From 4b986de5fb4fd84ab247f4d33328d1b2de7a2d73 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 25 Apr 2019 14:45:17 +0200
Subject: [PATCH 01/73] Prepare native CUDA hybrid integration

---
 OpenCL/inc_common.cl            |   31 +-
 OpenCL/inc_platform.cl          |   44 ++
 OpenCL/inc_platform.h           |   24 +
 OpenCL/inc_types.h              |    4 +
 OpenCL/inc_vendor.h             |   21 +-
 OpenCL/m00000_a3-optimized.cl   |    1 +
 OpenCL/m02500-pure.cl           |    4 +
 include/{opencl.h => backend.h} |   39 +-
 include/ext_cuda.h              | 1042 +++++++++++++++++++++++++++++++
 include/types.h                 |    8 +-
 src/Makefile                    |    4 +-
 src/autotune.c                  |   12 +-
 src/{opencl.c => backend.c}     |  596 +++++++++++-------
 src/dispatch.c                  |   52 +-
 src/emu_inc_platform.c          |   11 +
 src/ext_cuda.c                  |    8 +
 src/hashcat.c                   |   94 +--
 src/hashes.c                    |    2 +-
 src/hwmon.c                     |  138 ++--
 src/interface.c                 |    2 +-
 src/main.c                      |    8 +-
 src/monitor.c                   |   16 +-
 src/mpsp.c                      |   10 +-
 src/outfile.c                   |    2 +-
 src/selftest.c                  |    8 +-
 src/status.c                    |  120 ++--
 src/stdout.c                    |    2 +-
 src/terminal.c                  |   30 +-
 src/user_options.c              |    2 +-
 29 files changed, 1825 insertions(+), 510 deletions(-)
 create mode 100644 OpenCL/inc_platform.cl
 create mode 100644 OpenCL/inc_platform.h
 rename include/{opencl.h => backend.h} (88%)
 create mode 100644 include/ext_cuda.h
 rename src/{opencl.c => backend.c} (92%)
 create mode 100644 src/emu_inc_platform.c
 create mode 100644 src/ext_cuda.c

diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index d497d349e..a87649b3e 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 
 /**
@@ -1415,8 +1416,8 @@ DECLSPEC int is_valid_hex_8 (const u8 v)
 {
   // direct lookup table is slower thanks to CMOV
 
-  if ((v >= '0') && (v <= '9')) return 1;
-  if ((v >= 'a') && (v <= 'f')) return 1;
+  if ((v >= (u8) '0') && (v <= (u8) '9')) return 1;
+  if ((v >= (u8) 'a') && (v <= (u8) 'f')) return 1;
 
   return 0;
 }
@@ -1433,10 +1434,10 @@ DECLSPEC int is_valid_hex_32 (const u32 v)
 
 DECLSPEC int is_valid_base58_8 (const u8 v)
 {
-  if (v > 'z') return 0;
-  if (v < '1') return 0;
-  if ((v > '9') && (v < 'A')) return 0;
-  if ((v > 'Z') && (v < 'a')) return 0;
+  if (v > (u8) 'z') return 0;
+  if (v < (u8) '1') return 0;
+  if ((v > (u8) '9') && (v < (u8) 'A')) return 0;
+  if ((v > (u8) 'Z') && (v < (u8) 'a')) return 0;
 
   return 1;
 }
@@ -60860,7 +60861,23 @@ KERNEL_FQ void gpu_memset (GLOBAL_AS uint4 *buf, const u32 value, const u64 gid_
 
   if (gid >= gid_max) return;
 
-  buf[gid] = (uint4) (value);
+  uint4 r;
+
+  #if   defined IS_NATIVE
+  r = value;
+  #elif defined IS_OPENCL
+  r.s0 = value;
+  r.s1 = value;
+  r.s2 = value;
+  r.s3 = value;
+  #elif defined IS_CUDA
+  r.x = value;
+  r.y = value;
+  r.z = value;
+  r.w = value;
+  #endif
+
+  buf[gid] = r;
 }
 
 KERNEL_FQ void gpu_atinit (GLOBAL_AS pw_t *buf, const u64 gid_max)
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
new file mode 100644
index 000000000..5c6d9780e
--- /dev/null
+++ b/OpenCL/inc_platform.cl
@@ -0,0 +1,44 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.h"
+
+#ifdef IS_NATIVE
+#endif
+
+#ifdef IS_CUDA
+
+DECLSPEC u32 atomic_dec (u32 *p)
+{
+  return atomicSub (p, 1);
+}
+
+DECLSPEC u32 atomic_inc (u32 *p)
+{
+  return atomicAdd (p, 1);
+}
+
+DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
+{
+  return blockDim.x * blockIdx.x + threadIdx.x;
+}
+
+DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
+{
+  return threadIdx.x;
+}
+
+DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
+{
+  // verify
+  return blockDim.x;
+}
+
+#endif
+
+#ifdef IS_OPENCL
+#endif
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
new file mode 100644
index 000000000..fd3d310d1
--- /dev/null
+++ b/OpenCL/inc_platform.h
@@ -0,0 +1,24 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _INC_PLATFORM_H
+
+#ifdef IS_CUDA
+DECLSPEC u32    atomic_dec      (u32 *p);
+DECLSPEC u32    atomic_inc      (u32 *p);
+DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
+DECLSPEC size_t get_local_id    (const u32 dimindx __attribute__((unused)));
+DECLSPEC size_t get_local_size  (const u32 dimindx __attribute__((unused)));
+DECLSPEC uint4  uint4_init      (const u32 a);
+DECLSPEC uint4  uint4_init      (const u32 a, const u32 b, const u32 c, const u32 d);
+DECLSPEC __inline__ u8    rotate (const u8  v, const int i);
+DECLSPEC __inline__ u32   rotate (const u32 v, const int i);
+DECLSPEC __inline__ u64   rotate (const u64 v, const int i);
+
+#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n))))
+#define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))
+#endif
+
+#endif // _INC_PLATFORM_H
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index b9eb3bd03..0e254c24a 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -6,6 +6,10 @@
 #ifndef _INC_TYPES_H
 #define _INC_TYPES_H
 
+#ifdef IS_CUDA
+typedef unsigned char uchar;
+#endif
+
 #ifdef KERNEL_STATIC
 typedef uchar  u8;
 typedef ushort u16;
diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index ba85cbc74..b2bbd9037 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -6,12 +6,25 @@
 #ifndef _INC_VENDOR_H
 #define _INC_VENDOR_H
 
-#ifdef _CPU_OPENCL_EMU_H
+#if defined _CPU_OPENCL_EMU_H
+#define IS_NATIVE
+#elif defined __CUDACC__
+#define IS_CUDA
+#else
+#define IS_OPENCL
+#endif
+
+#if defined IS_NATIVE
 #define CONSTANT_AS
 #define GLOBAL_AS
 #define LOCAL_AS
 #define KERNEL_FQ
-#else
+#elif defined IS_CUDA
+#define CONSTANT_AS
+#define GLOBAL_AS
+#define LOCAL_AS
+#define KERNEL_FQ   __global__
+#elif defined IS_OPENCL
 #define CONSTANT_AS __constant
 #define GLOBAL_AS   __global
 #define LOCAL_AS    __local
@@ -90,11 +103,15 @@
 #if defined IS_CPU
 #define DECLSPEC inline
 #elif defined IS_GPU
+#if defined IS_CUDA
+#define DECLSPEC __device__
+#else
 #if defined IS_AMD
 #define DECLSPEC inline static
 #else
 #define DECLSPEC
 #endif
+#endif
 #else
 #define DECLSPEC
 #endif
diff --git a/OpenCL/m00000_a3-optimized.cl b/OpenCL/m00000_a3-optimized.cl
index 72403afb8..ef27d52f1 100644
--- a/OpenCL/m00000_a3-optimized.cl
+++ b/OpenCL/m00000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl
index a4ebce4d5..8066d3298 100644
--- a/OpenCL/m02500-pure.cl
+++ b/OpenCL/m02500-pure.cl
@@ -679,7 +679,11 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)
     s_te4[i] = te4[i];
   }
 
+  #ifdef IS_CUDA
+  __syncthreads();
+  #else
   barrier (CLK_LOCAL_MEM_FENCE);
+  #endif
 
   #else
 
diff --git a/include/opencl.h b/include/backend.h
similarity index 88%
rename from include/opencl.h
rename to include/backend.h
index bd45111c2..9b25a3496 100644
--- a/include/opencl.h
+++ b/include/backend.h
@@ -3,8 +3,8 @@
  * License.....: MIT
  */
 
-#ifndef _OPENCL_H
-#define _OPENCL_H
+#ifndef _BACKEND_H
+#define _BACKEND_H
 
 #include <stdio.h>
 #include <errno.h>
@@ -22,8 +22,11 @@ static const char CL_VENDOR_MESA[]            = "Mesa";
 static const char CL_VENDOR_NV[]              = "NVIDIA Corporation";
 static const char CL_VENDOR_POCL[]            = "The pocl project";
 
-int  ocl_init  (hashcat_ctx_t *hashcat_ctx);
-void ocl_close (hashcat_ctx_t *hashcat_ctx);
+int  cuda_init  (hashcat_ctx_t *hashcat_ctx);
+void cuda_close (hashcat_ctx_t *hashcat_ctx);
+
+int  ocl_init   (hashcat_ctx_t *hashcat_ctx);
+void ocl_close  (hashcat_ctx_t *hashcat_ctx);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
@@ -82,20 +85,20 @@ void generate_cached_kernel_mp_filename  (const u32 opti_type, const u64 opts_ty
 void generate_source_kernel_amp_filename (const u32 attack_kern, char *shared_dir, char *source_file);
 void generate_cached_kernel_amp_filename (const u32 attack_kern, char *profile_dir, const char *device_name_chksum, char *cached_file);
 
-int  opencl_ctx_init                  (hashcat_ctx_t *hashcat_ctx);
-void opencl_ctx_destroy               (hashcat_ctx_t *hashcat_ctx);
+int  backend_ctx_init                  (hashcat_ctx_t *hashcat_ctx);
+void backend_ctx_destroy               (hashcat_ctx_t *hashcat_ctx);
 
-int  opencl_ctx_devices_init          (hashcat_ctx_t *hashcat_ctx, const int comptime);
-void opencl_ctx_devices_destroy       (hashcat_ctx_t *hashcat_ctx);
-void opencl_ctx_devices_sync_tuning   (hashcat_ctx_t *hashcat_ctx);
-void opencl_ctx_devices_update_power  (hashcat_ctx_t *hashcat_ctx);
-void opencl_ctx_devices_kernel_loops  (hashcat_ctx_t *hashcat_ctx);
+int  backend_ctx_devices_init          (hashcat_ctx_t *hashcat_ctx, const int comptime);
+void backend_ctx_devices_destroy       (hashcat_ctx_t *hashcat_ctx);
+void backend_ctx_devices_sync_tuning   (hashcat_ctx_t *hashcat_ctx);
+void backend_ctx_devices_update_power  (hashcat_ctx_t *hashcat_ctx);
+void backend_ctx_devices_kernel_loops  (hashcat_ctx_t *hashcat_ctx);
 
-int  opencl_session_begin             (hashcat_ctx_t *hashcat_ctx);
-void opencl_session_destroy           (hashcat_ctx_t *hashcat_ctx);
-void opencl_session_reset             (hashcat_ctx_t *hashcat_ctx);
-int  opencl_session_update_combinator (hashcat_ctx_t *hashcat_ctx);
-int  opencl_session_update_mp         (hashcat_ctx_t *hashcat_ctx);
-int  opencl_session_update_mp_rl      (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r);
+int  backend_session_begin             (hashcat_ctx_t *hashcat_ctx);
+void backend_session_destroy           (hashcat_ctx_t *hashcat_ctx);
+void backend_session_reset             (hashcat_ctx_t *hashcat_ctx);
+int  backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx);
+int  backend_session_update_mp         (hashcat_ctx_t *hashcat_ctx);
+int  backend_session_update_mp_rl      (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r);
 
-#endif // _OPENCL_H
+#endif // _BACKEND_H
diff --git a/include/ext_cuda.h b/include/ext_cuda.h
new file mode 100644
index 000000000..d51fd2286
--- /dev/null
+++ b/include/ext_cuda.h
@@ -0,0 +1,1042 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _EXT_CUDA_H
+#define _EXT_CUDA_H
+
+/**
+ * from cuda.h (/usr/local/cuda-10.1/targets/x86_64-linux/include/cuda.h)
+ */
+
+#define __CUDA_API_VERSION 10010
+
+/**
+ * CUDA device pointer
+ * CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
+ */
+#if __CUDA_API_VERSION >= 3020
+
+#if defined(_WIN64) || defined(__LP64__)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+
+#endif /* __CUDA_API_VERSION >= 3020 */
+
+typedef int CUdevice;                                     /**< CUDA device */
+typedef struct CUctx_st *CUcontext;                       /**< CUDA context */
+typedef struct CUevent_st *CUevent;                       /**< CUDA event */
+typedef struct CUfunc_st *CUfunction;                     /**< CUDA function */
+typedef struct CUmod_st *CUmodule;                        /**< CUDA module */
+typedef struct CUstream_st *CUstream;                     /**< CUDA stream */
+
+typedef enum cudaError_enum {
+    /**
+     * The API call returned with no errors. In the case of query calls, this
+     * also means that the operation being queried is complete (see
+     * ::cuEventQuery() and ::cuStreamQuery()).
+     */
+    CUDA_SUCCESS                              = 0,
+
+    /**
+     * This indicates that one or more of the parameters passed to the API call
+     * is not within an acceptable range of values.
+     */
+    CUDA_ERROR_INVALID_VALUE                  = 1,
+
+    /**
+     * The API call failed because it was unable to allocate enough memory to
+     * perform the requested operation.
+     */
+    CUDA_ERROR_OUT_OF_MEMORY                  = 2,
+
+    /**
+     * This indicates that the CUDA driver has not been initialized with
+     * ::cuInit() or that initialization has failed.
+     */
+    CUDA_ERROR_NOT_INITIALIZED                = 3,
+
+    /**
+     * This indicates that the CUDA driver is in the process of shutting down.
+     */
+    CUDA_ERROR_DEINITIALIZED                  = 4,
+
+    /**
+     * This indicates profiler is not initialized for this run. This can
+     * happen when the application is running with external profiling tools
+     * like visual profiler.
+     */
+    CUDA_ERROR_PROFILER_DISABLED              = 5,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of CUDA 5.0. It is no longer an error
+     * to attempt to enable/disable the profiling via ::cuProfilerStart or
+     * ::cuProfilerStop without initialization.
+     */
+    CUDA_ERROR_PROFILER_NOT_INITIALIZED       = 6,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of CUDA 5.0. It is no longer an error
+     * to call cuProfilerStart() when profiling is already enabled.
+     */
+    CUDA_ERROR_PROFILER_ALREADY_STARTED       = 7,
+
+    /**
+     * \deprecated
+     * This error return is deprecated as of CUDA 5.0. It is no longer an error
+     * to call cuProfilerStop() when profiling is already disabled.
+     */
+    CUDA_ERROR_PROFILER_ALREADY_STOPPED       = 8,
+
+    /**
+     * This indicates that no CUDA-capable devices were detected by the installed
+     * CUDA driver.
+     */
+    CUDA_ERROR_NO_DEVICE                      = 100,
+
+    /**
+     * This indicates that the device ordinal supplied by the user does not
+     * correspond to a valid CUDA device.
+     */
+    CUDA_ERROR_INVALID_DEVICE                 = 101,
+
+
+    /**
+     * This indicates that the device kernel image is invalid. This can also
+     * indicate an invalid CUDA module.
+     */
+    CUDA_ERROR_INVALID_IMAGE                  = 200,
+
+    /**
+     * This most frequently indicates that there is no context bound to the
+     * current thread. This can also be returned if the context passed to an
+     * API call is not a valid handle (such as a context that has had
+     * ::cuCtxDestroy() invoked on it). This can also be returned if a user
+     * mixes different API versions (i.e. 3010 context with 3020 API calls).
+     * See ::cuCtxGetApiVersion() for more details.
+     */
+    CUDA_ERROR_INVALID_CONTEXT                = 201,
+
+    /**
+     * This indicated that the context being supplied as a parameter to the
+     * API call was already the active context.
+     * \deprecated
+     * This error return is deprecated as of CUDA 3.2. It is no longer an
+     * error to attempt to push the active context via ::cuCtxPushCurrent().
+     */
+    CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
+
+    /**
+     * This indicates that a map or register operation has failed.
+     */
+    CUDA_ERROR_MAP_FAILED                     = 205,
+
+    /**
+     * This indicates that an unmap or unregister operation has failed.
+     */
+    CUDA_ERROR_UNMAP_FAILED                   = 206,
+
+    /**
+     * This indicates that the specified array is currently mapped and thus
+     * cannot be destroyed.
+     */
+    CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
+
+    /**
+     * This indicates that the resource is already mapped.
+     */
+    CUDA_ERROR_ALREADY_MAPPED                 = 208,
+
+    /**
+     * This indicates that there is no kernel image available that is suitable
+     * for the device. This can occur when a user specifies code generation
+     * options for a particular CUDA source file that do not include the
+     * corresponding device configuration.
+     */
+    CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
+
+    /**
+     * This indicates that a resource has already been acquired.
+     */
+    CUDA_ERROR_ALREADY_ACQUIRED               = 210,
+
+    /**
+     * This indicates that a resource is not mapped.
+     */
+    CUDA_ERROR_NOT_MAPPED                     = 211,
+
+    /**
+     * This indicates that a mapped resource is not available for access as an
+     * array.
+     */
+    CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
+
+    /**
+     * This indicates that a mapped resource is not available for access as a
+     * pointer.
+     */
+    CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
+
+    /**
+     * This indicates that an uncorrectable ECC error was detected during
+     * execution.
+     */
+    CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
+
+    /**
+     * This indicates that the ::CUlimit passed to the API call is not
+     * supported by the active device.
+     */
+    CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
+
+    /**
+     * This indicates that the ::CUcontext passed to the API call can
+     * only be bound to a single CPU thread at a time but is already
+     * bound to a CPU thread.
+     */
+    CUDA_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
+
+    /**
+     * This indicates that peer access is not supported across the given
+     * devices.
+     */
+    CUDA_ERROR_PEER_ACCESS_UNSUPPORTED        = 217,
+
+    /**
+     * This indicates that a PTX JIT compilation failed.
+     */
+    CUDA_ERROR_INVALID_PTX                    = 218,
+
+    /**
+     * This indicates an error with OpenGL or DirectX context.
+     */
+    CUDA_ERROR_INVALID_GRAPHICS_CONTEXT       = 219,
+
+    /**
+    * This indicates that an uncorrectable NVLink error was detected during the
+    * execution.
+    */
+    CUDA_ERROR_NVLINK_UNCORRECTABLE           = 220,
+
+    /**
+    * This indicates that the PTX JIT compiler library was not found.
+    */
+    CUDA_ERROR_JIT_COMPILER_NOT_FOUND         = 221,
+
+    /**
+     * This indicates that the device kernel source is invalid.
+     */
+    CUDA_ERROR_INVALID_SOURCE                 = 300,
+
+    /**
+     * This indicates that the file specified was not found.
+     */
+    CUDA_ERROR_FILE_NOT_FOUND                 = 301,
+
+    /**
+     * This indicates that a link to a shared object failed to resolve.
+     */
+    CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
+
+    /**
+     * This indicates that initialization of a shared object failed.
+     */
+    CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
+
+    /**
+     * This indicates that an OS call failed.
+     */
+    CUDA_ERROR_OPERATING_SYSTEM               = 304,
+
+    /**
+     * This indicates that a resource handle passed to the API call was not
+     * valid. Resource handles are opaque types like ::CUstream and ::CUevent.
+     */
+    CUDA_ERROR_INVALID_HANDLE                 = 400,
+
+    /**
+     * This indicates that a resource required by the API call is not in a
+     * valid state to perform the requested operation.
+     */
+    CUDA_ERROR_ILLEGAL_STATE                  = 401,
+
+    /**
+     * This indicates that a named symbol was not found. Examples of symbols
+     * are global/constant variable names, texture names, and surface names.
+     */
+    CUDA_ERROR_NOT_FOUND                      = 500,
+
+    /**
+     * This indicates that asynchronous operations issued previously have not
+     * completed yet. This result is not actually an error, but must be indicated
+     * differently than ::CUDA_SUCCESS (which indicates completion). Calls that
+     * may return this value include ::cuEventQuery() and ::cuStreamQuery().
+     */
+    CUDA_ERROR_NOT_READY                      = 600,
+
+    /**
+     * While executing a kernel, the device encountered a
+     * load or store instruction on an invalid memory address.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_ILLEGAL_ADDRESS                = 700,
+
+    /**
+     * This indicates that a launch did not occur because it did not have
+     * appropriate resources. This error usually indicates that the user has
+     * attempted to pass too many arguments to the device kernel, or the
+     * kernel launch specifies too many threads for the kernel's register
+     * count. Passing arguments of the wrong size (i.e. a 64-bit pointer
+     * when a 32-bit int is expected) is equivalent to passing too many
+     * arguments and can also result in this error.
+     */
+    CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
+
+    /**
+     * This indicates that the device kernel took too long to execute. This can
+     * only occur if timeouts are enabled - see the device attribute
+     * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
+
+    /**
+     * This error indicates a kernel launch that uses an incompatible texturing
+     * mode.
+     */
+    CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
+
+    /**
+     * This error indicates that a call to ::cuCtxEnablePeerAccess() is
+     * trying to re-enable peer access to a context which has already
+     * had peer access to it enabled.
+     */
+    CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704,
+
+    /**
+     * This error indicates that ::cuCtxDisablePeerAccess() is
+     * trying to disable peer access which has not been enabled yet
+     * via ::cuCtxEnablePeerAccess().
+     */
+    CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        = 705,
+
+    /**
+     * This error indicates that the primary context for the specified device
+     * has already been initialized.
+     */
+    CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,
+
+    /**
+     * This error indicates that the context current to the calling thread
+     * has been destroyed using ::cuCtxDestroy, or is a primary context which
+     * has not yet been initialized.
+     */
+    CUDA_ERROR_CONTEXT_IS_DESTROYED           = 709,
+
+    /**
+     * A device-side assert triggered during kernel execution. The context
+     * cannot be used anymore, and must be destroyed. All existing device
+     * memory allocations from this context are invalid and must be
+     * reconstructed if the program is to continue using CUDA.
+     */
+    CUDA_ERROR_ASSERT                         = 710,
+
+    /**
+     * This error indicates that the hardware resources required to enable
+     * peer access have been exhausted for one or more of the devices
+     * passed to ::cuCtxEnablePeerAccess().
+     */
+    CUDA_ERROR_TOO_MANY_PEERS                 = 711,
+
+    /**
+     * This error indicates that the memory range passed to ::cuMemHostRegister()
+     * has already been registered.
+     */
+    CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
+
+    /**
+     * This error indicates that the pointer passed to ::cuMemHostUnregister()
+     * does not correspond to any currently registered memory region.
+     */
+    CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED     = 713,
+
+    /**
+     * While executing a kernel, the device encountered a stack error.
+     * This can be due to stack corruption or exceeding the stack size limit.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_HARDWARE_STACK_ERROR           = 714,
+
+    /**
+     * While executing a kernel, the device encountered an illegal instruction.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_ILLEGAL_INSTRUCTION            = 715,
+
+    /**
+     * While executing a kernel, the device encountered a load or store instruction
+     * on a memory address which is not aligned.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_MISALIGNED_ADDRESS             = 716,
+
+    /**
+     * While executing a kernel, the device encountered an instruction
+     * which can only operate on memory locations in certain address spaces
+     * (global, shared, or local), but was supplied a memory address not
+     * belonging to an allowed address space.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_INVALID_ADDRESS_SPACE          = 717,
+
+    /**
+     * While executing a kernel, the device program counter wrapped its address space.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_INVALID_PC                     = 718,
+
+    /**
+     * An exception occurred on the device while executing a kernel. Common
+     * causes include dereferencing an invalid device pointer and accessing
+     * out of bounds shared memory. Less common cases can be system specific - more
+     * information about these cases can be found in the system specific user guide.
+     * This leaves the process in an inconsistent state and any further CUDA work
+     * will return the same error. To continue using CUDA, the process must be terminated
+     * and relaunched.
+     */
+    CUDA_ERROR_LAUNCH_FAILED                  = 719,
+
+    /**
+     * This error indicates that the number of blocks launched per grid for a kernel that was
+     * launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice
+     * exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor
+     * or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors
+     * as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
+     */
+    CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   = 720,
+
+    /**
+     * This error indicates that the attempted operation is not permitted.
+     */
+    CUDA_ERROR_NOT_PERMITTED                  = 800,
+
+    /**
+     * This error indicates that the attempted operation is not supported
+     * on the current system or device.
+     */
+    CUDA_ERROR_NOT_SUPPORTED                  = 801,
+
+    /**
+     * This error indicates that the system is not yet ready to start any CUDA
+     * work.  To continue using CUDA, verify the system configuration is in a
+     * valid state and all required driver daemons are actively running.
+     * More information about this error can be found in the system specific
+     * user guide.
+     */
+    CUDA_ERROR_SYSTEM_NOT_READY               = 802,
+
+    /**
+     * This error indicates that there is a mismatch between the versions of
+     * the display driver and the CUDA driver. Refer to the compatibility documentation
+     * for supported versions.
+     */
+    CUDA_ERROR_SYSTEM_DRIVER_MISMATCH         = 803,
+
+    /**
+     * This error indicates that the system was upgraded to run with forward compatibility
+     * but the visible hardware detected by CUDA does not support this configuration.
+     * Refer to the compatibility documentation for the supported hardware matrix or ensure
+     * that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES
+     * environment variable.
+     */
+    CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
+
+    /**
+     * This error indicates that the operation is not permitted when
+     * the stream is capturing.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED     = 900,
+
+    /**
+     * This error indicates that the current capture sequence on the stream
+     * has been invalidated due to a previous error.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_INVALIDATED     = 901,
+
+    /**
+     * This error indicates that the operation would have resulted in a merge
+     * of two independent capture sequences.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_MERGE           = 902,
+
+    /**
+     * This error indicates that the capture was not initiated in this stream.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_UNMATCHED       = 903,
+
+    /**
+     * This error indicates that the capture sequence contains a fork that was
+     * not joined to the primary stream.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_UNJOINED        = 904,
+
+    /**
+     * This error indicates that a dependency would have been created which
+     * crosses the capture sequence boundary. Only implicit in-stream ordering
+     * dependencies are allowed to cross the boundary.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_ISOLATION       = 905,
+
+    /**
+     * This error indicates a disallowed implicit dependency on a current capture
+     * sequence from cudaStreamLegacy.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_IMPLICIT        = 906,
+
+    /**
+     * This error indicates that the operation is not permitted on an event which
+     * was last recorded in a capturing stream.
+     */
+    CUDA_ERROR_CAPTURED_EVENT                 = 907,
+
+    /**
+     * A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED
+     * argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a
+     * different thread.
+     */
+    CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD    = 908,
+
+    /**
+     * This indicates that an unknown internal error has occurred.
+     */
+    CUDA_ERROR_UNKNOWN                        = 999
+} CUresult;
+
+/**
+ * Online compiler and linker options
+ */
+typedef enum CUjit_option_enum
+{
+    /**
+     * Max number of registers that a thread may use.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    CU_JIT_MAX_REGISTERS = 0,
+
+    /**
+     * IN: Specifies minimum number of threads per block to target compilation
+     * for\n
+     * OUT: Returns the number of threads the compiler actually targeted.
+     * This restricts the resource utilization fo the compiler (e.g. max
+     * registers) such that a block with the given number of threads should be
+     * able to launch based on register limitations. Note, this option does not
+     * currently take into account any other resource limitations, such as
+     * shared memory utilization.\n
+     * Cannot be combined with ::CU_JIT_TARGET.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    CU_JIT_THREADS_PER_BLOCK,
+
+    /**
+     * Overwrites the option value with the total wall clock time, in
+     * milliseconds, spent in the compiler and linker\n
+     * Option type: float\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_WALL_TIME,
+
+    /**
+     * Pointer to a buffer in which to print any log messages
+     * that are informational in nature (the buffer size is specified via
+     * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n
+     * Option type: char *\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_INFO_LOG_BUFFER,
+
+    /**
+     * IN: Log buffer size in bytes.  Log messages will be capped at this size
+     * (including null terminator)\n
+     * OUT: Amount of log buffer filled with messages\n
+     * Option type: unsigned int\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+
+    /**
+     * Pointer to a buffer in which to print any log messages that
+     * reflect errors (the buffer size is specified via option
+     * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n
+     * Option type: char *\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_ERROR_LOG_BUFFER,
+
+    /**
+     * IN: Log buffer size in bytes.  Log messages will be capped at this size
+     * (including null terminator)\n
+     * OUT: Amount of log buffer filled with messages\n
+     * Option type: unsigned int\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+
+    /**
+     * Level of optimizations to apply to generated code (0 - 4), with 4
+     * being the default and highest level of optimizations.\n
+     * Option type: unsigned int\n
+     * Applies to: compiler only
+     */
+    CU_JIT_OPTIMIZATION_LEVEL,
+
+    /**
+     * No option value required. Determines the target based on the current
+     * attached context (default)\n
+     * Option type: No option value needed\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_TARGET_FROM_CUCONTEXT,
+
+    /**
+     * Target is chosen based on supplied ::CUjit_target.  Cannot be
+     * combined with ::CU_JIT_THREADS_PER_BLOCK.\n
+     * Option type: unsigned int for enumerated type ::CUjit_target\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_TARGET,
+
+    /**
+     * Specifies choice of fallback strategy if matching cubin is not found.
+     * Choice is based on supplied ::CUjit_fallback.  This option cannot be
+     * used with cuLink* APIs as the linker requires exact matches.\n
+     * Option type: unsigned int for enumerated type ::CUjit_fallback\n
+     * Applies to: compiler only
+     */
+    CU_JIT_FALLBACK_STRATEGY,
+
+    /**
+     * Specifies whether to create debug information in output (-g)
+     * (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_GENERATE_DEBUG_INFO,
+
+    /**
+     * Generate verbose log messages (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler and linker
+     */
+    CU_JIT_LOG_VERBOSE,
+
+    /**
+     * Generate line number information (-lineinfo) (0: false, default)\n
+     * Option type: int\n
+     * Applies to: compiler only
+     */
+    CU_JIT_GENERATE_LINE_INFO,
+
+    /**
+     * Specifies whether to enable caching explicitly (-dlcm) \n
+     * Choice is based on supplied ::CUjit_cacheMode_enum.\n
+     * Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum\n
+     * Applies to: compiler only
+     */
+    CU_JIT_CACHE_MODE,
+
+    /**
+     * The below jit options are used for internal purposes only, in this version of CUDA
+     */
+    CU_JIT_NEW_SM3X_OPT,
+    CU_JIT_FAST_COMPILE,
+
+    /**
+     * Array of device symbol names that will be relocated to the corresponing
+     * host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES.\n
+     * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n
+     * When loding a device module, driver will relocate all encountered
+     * unresolved symbols to the host addresses.\n
+     * It is only allowed to register symbols that correspond to unresolved
+     * global variables.\n
+     * It is illegal to register the same device symbol at multiple addresses.\n
+     * Option type: const char **\n
+     * Applies to: dynamic linker only
+     */
+    CU_JIT_GLOBAL_SYMBOL_NAMES,
+
+    /**
+     * Array of host addresses that will be used to relocate corresponding
+     * device symbols stored in ::CU_JIT_GLOBAL_SYMBOL_NAMES.\n
+     * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n
+     * Option type: void **\n
+     * Applies to: dynamic linker only
+     */
+    CU_JIT_GLOBAL_SYMBOL_ADDRESSES,
+
+    /**
+     * Number of entries in ::CU_JIT_GLOBAL_SYMBOL_NAMES and
+     * ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n
+     * Option type: unsigned int\n
+     * Applies to: dynamic linker only
+     */
+    CU_JIT_GLOBAL_SYMBOL_COUNT,
+
+    CU_JIT_NUM_OPTIONS
+
+} CUjit_option;
+
+/**
+ * Device properties
+ */
+typedef enum CUdevice_attribute_enum {
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,              /**< Maximum number of threads per block */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,                    /**< Maximum block dimension X */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,                    /**< Maximum block dimension Y */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,                    /**< Maximum block dimension Z */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,                     /**< Maximum grid dimension X */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,                     /**< Maximum grid dimension Y */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,                     /**< Maximum grid dimension Z */
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,        /**< Maximum shared memory available per block in bytes */
+    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,            /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
+    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,              /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */
+    CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,                         /**< Warp size in threads */
+    CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,                         /**< Maximum pitch in bytes allowed by memory copies */
+    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,           /**< Maximum number of 32-bit registers available per block */
+    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,               /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
+    CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,                        /**< Typical clock frequency in kilohertz */
+    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,                 /**< Alignment requirement for textures */
+    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,                       /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */
+    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,              /**< Number of multiprocessors on device */
+    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,               /**< Specifies whether there is a run time limit on kernels */
+    CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,                        /**< Device is integrated with host memory */
+    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,               /**< Device can map host memory into CUDA address space */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,                      /**< Compute mode (See ::CUcomputemode for details) */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,           /**< Maximum 1D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,           /**< Maximum 2D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,          /**< Maximum 2D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,           /**< Maximum 3D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,          /**< Maximum 3D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,           /**< Maximum 3D texture depth */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,   /**< Maximum 2D layered texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,  /**< Maximum 2D layered texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,  /**< Maximum layers in a 2D layered texture */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,     /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,    /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */
+    CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,                 /**< Alignment requirement for surfaces */
+    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,                /**< Device can possibly execute multiple kernels concurrently */
+    CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,                       /**< Device has ECC support enabled */
+    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,                        /**< PCI bus ID of the device */
+    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,                     /**< PCI device ID of the device */
+    CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,                        /**< Device is using TCC driver model */
+    CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,                 /**< Peak memory clock frequency in kilohertz */
+    CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,           /**< Global memory bus width in bits */
+    CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,                     /**< Size of L2 cache in bytes */
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,    /**< Maximum resident threads per multiprocessor */
+    CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,                /**< Number of asynchronous engines */
+    CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,                /**< Device shares a unified address space with the host */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,   /**< Maximum 1D layered texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,  /**< Maximum layers in a 1D layered texture */
+    CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,                  /**< Deprecated, do not use. */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,    /**< Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,   /**< Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, /**< Alternate maximum 3D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,/**< Alternate maximum 3D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, /**< Alternate maximum 3D texture depth */
+    CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,                     /**< PCI domain ID of the device */
+    CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,           /**< Pitch alignment requirement for textures */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,      /**< Maximum cubemap texture width/height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,  /**< Maximum cubemap layered texture width/height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, /**< Maximum layers in a cubemap layered texture */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,           /**< Maximum 1D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,           /**< Maximum 2D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,          /**< Maximum 2D surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,           /**< Maximum 3D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,          /**< Maximum 3D surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,           /**< Maximum 3D surface depth */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,   /**< Maximum 1D layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,  /**< Maximum layers in a 1D layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,   /**< Maximum 2D layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,  /**< Maximum 2D layered surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,  /**< Maximum layers in a 2D layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,      /**< Maximum cubemap surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,  /**< Maximum cubemap layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, /**< Maximum layers in a cubemap layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,    /**< Maximum 1D linear texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,    /**< Maximum 2D linear texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,   /**< Maximum 2D linear texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,    /**< Maximum 2D linear texture pitch in bytes */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, /**< Maximum mipmapped 2D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,/**< Maximum mipmapped 2D texture height */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,          /**< Major compute capability version number */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,          /**< Minor compute capability version number */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, /**< Maximum mipmapped 1D texture width */
+    CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,       /**< Device supports stream priorities */
+    CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,         /**< Device supports caching globals in L1 */
+    CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,          /**< Device supports caching locals in L1 */
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,  /**< Maximum shared memory available per multiprocessor in bytes */
+    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,  /**< Maximum number of 32-bit registers available per multiprocessor */
+    CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,                    /**< Device can allocate managed memory on this system */
+    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,                    /**< Device is on a multi-GPU board */
+    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,           /**< Unique id for a group of devices on the same multi-GPU board */
+    CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,       /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/
+    CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,  /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
+    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,            /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
+    CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,         /**< Device can coherently access managed memory concurrently with the CPU */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,      /**< Device supports compute preemption. */
+    CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, /**< Device can access host registered memory at the same virtual address as the CPU */
+    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,            /**< ::cuStreamBatchMemOp and related APIs are supported. */
+    CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,     /**< 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. */
+    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,     /**< ::CU_STREAM_WAIT_VALUE_NOR is supported. */
+    CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,                /**< Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel */
+    CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,   /**< Device can participate in cooperative kernels launched via ::cuLaunchCooperativeKernelMultiDevice */
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, /**< Maximum optin shared memory per block */
+    CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,           /**< Both the ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. */
+    CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,           /**< Device supports host memory registration via ::cudaHostRegister. */
+    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, /**< Device accesses pageable memory via the host's page tables. */
+    CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, /**< The host can directly access managed memory on the device without migration. */
+    CU_DEVICE_ATTRIBUTE_MAX
+} CUdevice_attribute;
+
+/**
+ * Function cache configurations
+ */
+typedef enum CUfunc_cache_enum {
+    CU_FUNC_CACHE_PREFER_NONE    = 0x00, /**< no preference for shared memory or L1 (default) */
+    CU_FUNC_CACHE_PREFER_SHARED  = 0x01, /**< prefer larger shared memory and smaller L1 cache */
+    CU_FUNC_CACHE_PREFER_L1      = 0x02, /**< prefer larger L1 cache and smaller shared memory */
+    CU_FUNC_CACHE_PREFER_EQUAL   = 0x03  /**< prefer equal sized L1 cache and shared memory */
+} CUfunc_cache;
+
+/**
+ * Shared memory configurations
+ */
+typedef enum CUsharedconfig_enum {
+    CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE    = 0x00, /**< set default shared memory bank size */
+    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE  = 0x01, /**< set shared memory bank width to four bytes */
+    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02  /**< set shared memory bank width to eight bytes */
+} CUsharedconfig;
+
+/**
+ * Function properties
+ */
+typedef enum CUfunction_attribute_enum {
+    /**
+     * The maximum number of threads per block, beyond which a launch of the
+     * function would fail. This number depends on both the function and the
+     * device on which the function is currently loaded.
+     */
+    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
+
+    /**
+     * The size in bytes of statically-allocated shared memory required by
+     * this function. This does not include dynamically-allocated shared
+     * memory requested by the user at runtime.
+     */
+    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
+
+    /**
+     * The size in bytes of user-allocated constant memory required by this
+     * function.
+     */
+    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
+
+    /**
+     * The size in bytes of local memory used by each thread of this function.
+     */
+    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
+
+    /**
+     * The number of registers used by each thread of this function.
+     */
+    CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
+
+    /**
+     * The PTX virtual architecture version for which the function was
+     * compiled. This value is the major PTX version * 10 + the minor PTX
+     * version, so a PTX version 1.3 function would return the value 13.
+     * Note that this may return the undefined value of 0 for cubins
+     * compiled prior to CUDA 3.0.
+     */
+    CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
+
+    /**
+     * The binary architecture version for which the function was compiled.
+     * This value is the major binary version * 10 + the minor binary version,
+     * so a binary version 1.3 function would return the value 13. Note that
+     * this will return a value of 10 for legacy cubins that do not have a
+     * properly-encoded binary architecture version.
+     */
+    CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
+
+    /**
+     * The attribute to indicate whether the function has been compiled with
+     * user specified option "-Xptxas --dlcm=ca" set .
+     */
+    CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
+
+    /**
+     * The maximum size in bytes of dynamically-allocated shared memory that can be used by
+     * this function. If the user-specified dynamic shared memory size is larger than this
+     * value, the launch will fail.
+     * See ::cuFuncSetAttribute
+     */
+    CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
+
+    /**
+     * On devices where the L1 cache and shared memory use the same hardware resources,
+     * this sets the shared memory carveout preference, in percent of the total shared memory.
+     * Refer to ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR.
+     * This is only a hint, and the driver can choose a different ratio if required to execute the function.
+     * See ::cuFuncSetAttribute
+     */
+    CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
+
+    CU_FUNC_ATTRIBUTE_MAX
+} CUfunction_attribute;
+
+#ifdef _WIN32
+#define CUDAAPI __stdcall
+#else
+#define CUDAAPI
+#endif
+
+#define CUDA_API_CALL CUDAAPI
+
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXCREATE)              (CUcontext *, unsigned int, CUdevice);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXDESTROY)             (CUcontext);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXGETCACHECONFIG)      (CUfunc_cache *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXGETCURRENT)          (CUcontext *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXGETSHAREDMEMCONFIG)  (CUsharedconfig *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXPOPCURRENT)          (CUcontext *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXPUSHCURRENT)         (CUcontext);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXSETCACHECONFIG)      (CUfunc_cache);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXSETCURRENT)          (CUcontext);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXSETSHAREDMEMCONFIG)  (CUsharedconfig);
+typedef CUresult (CUDA_API_CALL *CUDA_CUCTXSYNCHRONIZE)         ();
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICEGETATTRIBUTE)     (int *, CUdevice_attribute, CUdevice);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICEGETCOUNT)         (int *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICEGET)              (CUdevice *, int);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICEGETNAME)          (char *, int, CUdevice);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDEVICETOTALMEM)         (size_t *, CUdevice);
+typedef CUresult (CUDA_API_CALL *CUDA_CUDRIVERGETVERSION)       (int *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTCREATE)            (CUevent *, unsigned int);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTDESTROY)           (CUevent);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTELAPSEDTIME)       (float *, CUevent, CUevent);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTQUERY)             (CUevent);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTRECORD)            (CUevent, CUstream);
+typedef CUresult (CUDA_API_CALL *CUDA_CUEVENTSYNCHRONIZE)       (CUevent);
+typedef CUresult (CUDA_API_CALL *CUDA_CUFUNCGETATTRIBUTE)       (int *, CUfunction_attribute, CUfunction);
+typedef CUresult (CUDA_API_CALL *CUDA_CUFUNCSETATTRIBUTE)       (CUfunction, CUfunction_attribute, int);
+typedef CUresult (CUDA_API_CALL *CUDA_CUFUNCSETCACHECONFIG)     (CUfunction, CUfunc_cache);
+typedef CUresult (CUDA_API_CALL *CUDA_CUFUNCSETSHAREDMEMCONFIG) (CUfunction, CUsharedconfig);
+typedef CUresult (CUDA_API_CALL *CUDA_CUGETERRORNAME)           (CUresult, const char **);
+typedef CUresult (CUDA_API_CALL *CUDA_CUGETERRORSTRING)         (CUresult, const char **);
+typedef CUresult (CUDA_API_CALL *CUDA_CUINIT)                   (unsigned int);
+typedef CUresult (CUDA_API_CALL *CUDA_CULAUNCHKERNEL)           (CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOC)               (CUdeviceptr *, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOCHOST)           (void **, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOD)             (CUdeviceptr, CUdeviceptr, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOH)             (void *, CUdeviceptr, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTOD)             (CUdeviceptr, const void *, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREE)                (CUdeviceptr);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREEHOST)            (void *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMGETINFO)             (size_t *, size_t *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32)              (CUdeviceptr, unsigned int, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8)               (CUdeviceptr, unsigned char, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETFUNCTION)      (CUfunction *, CUmodule, const char *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETGLOBAL)        (CUdeviceptr *, size_t *, CUmodule, const char *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOAD)             (CUmodule *, const char *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOADDATA)         (CUmodule *, const void *);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOADDATAEX)       (CUmodule *, const void *, unsigned int, CUjit_option *, void **);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEUNLOAD)           (CUmodule);
+typedef CUresult (CUDA_API_CALL *CUDA_CUPROFILERSTART)          ();
+typedef CUresult (CUDA_API_CALL *CUDA_CUPROFILERSTOP)           ();
+typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMCREATE)           (CUstream *, unsigned int);
+typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMDESTROY)          (CUstream);
+typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMSYNCHRONIZE)      (CUstream);
+typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMWAITEVENT)        (CUstream, CUevent, unsigned int);
+
+typedef struct hc_cuda_lib
+{
+  hc_dynlib_t lib;
+
+  CUDA_CUCTXCREATE              cuCtxCreate;
+  CUDA_CUCTXDESTROY             cuCtxDestroy;
+  CUDA_CUCTXGETCACHECONFIG      cuCtxGetCacheConfig;
+  CUDA_CUCTXGETCURRENT          cuCtxGetCurrent;
+  CUDA_CUCTXGETSHAREDMEMCONFIG  cuCtxGetSharedMemConfig;
+  CUDA_CUCTXPOPCURRENT          cuCtxPopCurrent;
+  CUDA_CUCTXPUSHCURRENT         cuCtxPushCurrent;
+  CUDA_CUCTXSETCURRENT          cuCtxSetCurrent;
+  CUDA_CUCTXSETSHAREDMEMCONFIG  cuCtxSetSharedMemConfig;
+  CUDA_CUCTXSYNCHRONIZE         cuCtxSynchronize;
+  CUDA_CUDEVICEGETATTRIBUTE     cuDeviceGetAttribute;
+  CUDA_CUDEVICEGETCOUNT         cuDeviceGetCount;
+  CUDA_CUDEVICEGET              cuDeviceGet;
+  CUDA_CUDEVICEGETNAME          cuDeviceGetName;
+  CUDA_CUDEVICETOTALMEM         cuDeviceTotalMem;
+  CUDA_CUDRIVERGETVERSION       cuDriverGetVersion;
+  CUDA_CUEVENTCREATE            cuEventCreate;
+  CUDA_CUEVENTDESTROY           cuEventDestroy;
+  CUDA_CUEVENTELAPSEDTIME       cuEventElapsedTime;
+  CUDA_CUEVENTQUERY             cuEventQuery;
+  CUDA_CUEVENTRECORD            cuEventRecord;
+  CUDA_CUEVENTSYNCHRONIZE       cuEventSynchronize;
+  CUDA_CUFUNCGETATTRIBUTE       cuFuncGetAttribute;
+  CUDA_CUFUNCSETATTRIBUTE       cuFuncSetAttribute;
+  CUDA_CUFUNCSETCACHECONFIG     cuFuncSetCacheConfig;
+  CUDA_CUFUNCSETSHAREDMEMCONFIG cuFuncSetSharedMemConfig;
+  CUDA_CUGETERRORNAME           cuGetErrorName;
+  CUDA_CUGETERRORSTRING         cuGetErrorString;
+  CUDA_CUINIT                   cuInit;
+  CUDA_CULAUNCHKERNEL           cuLaunchKernel;
+  CUDA_CUMEMALLOC               cuMemAlloc;
+  CUDA_CUMEMALLOCHOST           cuMemAllocHost;
+  CUDA_CUMEMCPYDTOD             cuMemcpyDtoD;
+  CUDA_CUMEMCPYDTOH             cuMemcpyDtoH;
+  CUDA_CUMEMCPYHTOD             cuMemcpyHtoD;
+  CUDA_CUMEMFREE                cuMemFree;
+  CUDA_CUMEMFREEHOST            cuMemFreeHost;
+  CUDA_CUMEMGETINFO             cuMemGetInfo;
+  CUDA_CUMEMSETD32              cuMemsetD32;
+  CUDA_CUMEMSETD8               cuMemsetD8;
+  CUDA_CUMODULEGETFUNCTION      cuModuleGetFunction;
+  CUDA_CUMODULEGETGLOBAL        cuModuleGetGlobal;
+  CUDA_CUMODULELOAD             cuModuleLoad;
+  CUDA_CUMODULELOADDATA         cuModuleLoadData;
+  CUDA_CUMODULELOADDATAEX       cuModuleLoadDataEx;
+  CUDA_CUMODULEUNLOAD           cuModuleUnload;
+  CUDA_CUPROFILERSTART          cuProfilerStart;
+  CUDA_CUPROFILERSTOP           cuProfilerStop;
+  CUDA_CUSTREAMCREATE           cuStreamCreate;
+  CUDA_CUSTREAMDESTROY          cuStreamDestroy;
+  CUDA_CUSTREAMSYNCHRONIZE      cuStreamSynchronize;
+  CUDA_CUSTREAMWAITEVENT        cuStreamWaitEvent;
+
+} hc_cuda_lib_t;
+
+typedef hc_cuda_lib_t CUDA_PTR;
+
+#endif // _EXT_CUDA_H
diff --git a/include/types.h b/include/types.h
index 7c1960147..f35eca3f7 100644
--- a/include/types.h
+++ b/include/types.h
@@ -989,6 +989,7 @@ typedef struct link_speed
 
 } link_speed_t;
 
+#include "ext_cuda.h"
 #include "ext_OpenCL.h"
 
 typedef struct hc_device_param
@@ -1328,11 +1329,12 @@ typedef struct hc_device_param
 
 } hc_device_param_t;
 
-typedef struct opencl_ctx
+typedef struct backend_ctx
 {
   bool                enabled;
 
   void               *ocl;
+  void               *cuda;
 
   cl_uint             platforms_cnt;
   cl_platform_id     *platforms;
@@ -1369,7 +1371,7 @@ typedef struct opencl_ctx
 
   int                 force_jit_compilation;
 
-} opencl_ctx_t;
+} backend_ctx_t;
 
 typedef enum kernel_workload
 {
@@ -2299,7 +2301,7 @@ typedef struct hashcat_ctx
   loopback_ctx_t        *loopback_ctx;
   mask_ctx_t            *mask_ctx;
   module_ctx_t          *module_ctx;
-  opencl_ctx_t          *opencl_ctx;
+  backend_ctx_t         *backend_ctx;
   outcheck_ctx_t        *outcheck_ctx;
   outfile_ctx_t         *outfile_ctx;
   pidfile_ctx_t         *pidfile_ctx;
diff --git a/src/Makefile b/src/Makefile
index c2a3c2f8a..196ad6c89 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -274,13 +274,13 @@ endif # MSYS2
 ## Objects
 ##
 
-EMU_OBJS_ALL            := emu_general emu_inc_common emu_inc_scalar emu_inc_simd
+EMU_OBJS_ALL            := emu_general emu_inc_common emu_inc_platform emu_inc_scalar emu_inc_simd
 EMU_OBJS_ALL            += emu_inc_rp emu_inc_rp_optimized
 EMU_OBJS_ALL            += emu_inc_truecrypt_crc32 emu_inc_truecrypt_keyfile emu_inc_truecrypt_xts emu_inc_veracrypt_xts
 EMU_OBJS_ALL            += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512
 EMU_OBJS_ALL            += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish
 
-OBJS_ALL                := affinity autotune benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_nvapi ext_nvml ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp opencl outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
+OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
 
 ifeq ($(ENABLE_BRAIN),1)
 OBJS_ALL                += brain
diff --git a/src/autotune.c b/src/autotune.c
index de54fd063..ab383e63f 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -6,7 +6,7 @@
 #include "common.h"
 #include "types.h"
 #include "event.h"
-#include "opencl.h"
+#include "backend.h"
 #include "status.h"
 #include "autotune.h"
 
@@ -50,11 +50,11 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
 static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
   const hashconfig_t    *hashconfig   = hashcat_ctx->hashconfig;
-  const opencl_ctx_t    *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t   *backend_ctx  = hashcat_ctx->backend_ctx;
   const straight_ctx_t  *straight_ctx = hashcat_ctx->straight_ctx;
   const user_options_t  *user_options = hashcat_ctx->user_options;
 
-  const double target_msec = opencl_ctx->target_msec;
+  const double target_msec = backend_ctx->target_msec;
 
   const u32 kernel_accel_min = device_param->kernel_accel_min;
   const u32 kernel_accel_max = device_param->kernel_accel_max;
@@ -283,11 +283,11 @@ HC_API_CALL void *thread_autotune (void *p)
 
   hashcat_ctx_t *hashcat_ctx = thread_param->hashcat_ctx;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return NULL;
+  if (backend_ctx->enabled == false) return NULL;
 
-  hc_device_param_t *device_param = opencl_ctx->devices_param + thread_param->tid;
+  hc_device_param_t *device_param = backend_ctx->devices_param + thread_param->tid;
 
   if (device_param->skipped == true) return NULL;
 
diff --git a/src/opencl.c b/src/backend.c
similarity index 92%
rename from src/opencl.c
rename to src/backend.c
index 4086a00f6..41cf2b645 100644
--- a/src/opencl.c
+++ b/src/backend.c
@@ -22,7 +22,7 @@
 #include "emu_inc_hash_md5.h"
 #include "event.h"
 #include "dynloader.h"
-#include "opencl.h"
+#include "backend.h"
 
 #if defined (__linux__)
 static const char *dri_card0_path = "/dev/dri/card0";
@@ -340,9 +340,9 @@ static bool test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context context, cl
 
   if (CL_rc == -1) return false;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   // LLVM seems to write an error message (if there's an error) directly to stderr
   // and not (as supposted to) into buffer for later request using clGetProgramBuildInfo()
@@ -543,11 +543,120 @@ void generate_cached_kernel_amp_filename (const u32 attack_kern, char *profile_d
   snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.kernel", profile_dir, attack_kern, device_name_chksum_amp_mp);
 }
 
+int cuda_init (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  memset (cuda, 0, sizeof (CUDA_PTR));
+
+  #if   defined (_WIN)
+  cuda->lib = hc_dlopen ("cuda");
+  #elif defined (__APPLE__)
+  cuda->lib = hc_dlopen ("/System/Library/Frameworks/CUDA.framework/CUDA");
+  #elif defined (__CYGWIN__)
+  cuda->lib = hc_dlopen ("cuda.dll");
+
+  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("cygcuda-1.dll");
+  #else
+  cuda->lib = hc_dlopen ("libcuda.so");
+
+  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("libcuda.so.1");
+  #endif
+
+  if (cuda->lib == NULL)
+  {
+    event_log_error (hashcat_ctx, "Cannot find CUDA library.");
+
+    event_log_warning (hashcat_ctx, "You are probably missing the native CUDA runtime or driver for your platform.");
+    event_log_warning (hashcat_ctx, "NVIDIA GPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, NULL);
+
+    return -1;
+  }
+
+  HC_LOAD_FUNC (cuda, cuCtxCreate,              CUDA_CUCTXCREATE,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxDestroy,             CUDA_CUCTXDESTROY,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetCacheConfig,      CUDA_CUCTXGETCACHECONFIG,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetCurrent,          CUDA_CUCTXGETCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetSharedMemConfig,  CUDA_CUCTXGETSHAREDMEMCONFIG,   CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxPopCurrent,          CUDA_CUCTXPOPCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxPushCurrent,         CUDA_CUCTXPUSHCURRENT,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSetCurrent,          CUDA_CUCTXSETCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSetSharedMemConfig,  CUDA_CUCTXSETSHAREDMEMCONFIG,   CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSynchronize,         CUDA_CUCTXSYNCHRONIZE,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetAttribute,     CUDA_CUDEVICEGETATTRIBUTE,      CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetCount,         CUDA_CUDEVICEGETCOUNT,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGet,              CUDA_CUDEVICEGET,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetName,          CUDA_CUDEVICEGETNAME,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceTotalMem,         CUDA_CUDEVICETOTALMEM,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDriverGetVersion,       CUDA_CUDRIVERGETVERSION,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventCreate,            CUDA_CUEVENTCREATE,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventDestroy,           CUDA_CUEVENTDESTROY,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventElapsedTime,       CUDA_CUEVENTELAPSEDTIME,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventQuery,             CUDA_CUEVENTQUERY,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventRecord,            CUDA_CUEVENTRECORD,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventSynchronize,       CUDA_CUEVENTSYNCHRONIZE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncGetAttribute,       CUDA_CUFUNCGETATTRIBUTE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetAttribute,       CUDA_CUFUNCSETATTRIBUTE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetCacheConfig,     CUDA_CUFUNCSETCACHECONFIG,      CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetSharedMemConfig, CUDA_CUFUNCSETSHAREDMEMCONFIG,  CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuGetErrorName,           CUDA_CUGETERRORNAME,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuGetErrorString,         CUDA_CUGETERRORSTRING,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuInit,                   CUDA_CUINIT,                    CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuLaunchKernel,           CUDA_CULAUNCHKERNEL,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemAlloc,               CUDA_CUMEMALLOC,                CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemAllocHost,           CUDA_CUMEMALLOCHOST,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyDtoD,             CUDA_CUMEMCPYDTOD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyDtoH,             CUDA_CUMEMCPYDTOH,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyHtoD,             CUDA_CUMEMCPYHTOD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemFree,                CUDA_CUMEMFREE,                 CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemFreeHost,            CUDA_CUMEMFREEHOST,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemGetInfo,             CUDA_CUMEMGETINFO,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemsetD32,              CUDA_CUMEMSETD32,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemsetD8,               CUDA_CUMEMSETD8,                CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleGetFunction,      CUDA_CUMODULEGETFUNCTION,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleGetGlobal,        CUDA_CUMODULEGETGLOBAL,         CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoad,             CUDA_CUMODULELOAD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoadData,         CUDA_CUMODULELOADDATA,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoadDataEx,       CUDA_CUMODULELOADDATAEX,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleUnload,           CUDA_CUMODULEUNLOAD,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuProfilerStart,          CUDA_CUPROFILERSTART,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuProfilerStop,           CUDA_CUPROFILERSTOP,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamCreate,           CUDA_CUSTREAMCREATE,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamDestroy,          CUDA_CUSTREAMDESTROY,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamSynchronize,      CUDA_CUSTREAMSYNCHRONIZE,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamWaitEvent,        CUDA_CUSTREAMWAITEVENT,         CUDA, 1);
+
+  return 0;
+}
+
+void cuda_close (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  if (cuda)
+  {
+    if (cuda->lib)
+    {
+      hc_dlclose (cuda->lib);
+    }
+
+    hcfree (backend_ctx->cuda);
+
+    backend_ctx->cuda = NULL;
+  }
+}
+
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   memset (ocl, 0, sizeof (OCL_PTR));
 
@@ -598,47 +707,47 @@ int ocl_init (hashcat_ctx_t *hashcat_ctx)
     return -1;
   }
 
-  HC_LOAD_FUNC(ocl, clBuildProgram, OCL_CLBUILDPROGRAM, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateBuffer, OCL_CLCREATEBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateCommandQueue, OCL_CLCREATECOMMANDQUEUE, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateContext, OCL_CLCREATECONTEXT, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateKernel, OCL_CLCREATEKERNEL, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateProgramWithBinary, OCL_CLCREATEPROGRAMWITHBINARY, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clCreateProgramWithSource, OCL_CLCREATEPROGRAMWITHSOURCE, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueCopyBuffer, OCL_CLENQUEUECOPYBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueMapBuffer, OCL_CLENQUEUEMAPBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueNDRangeKernel, OCL_CLENQUEUENDRANGEKERNEL, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueReadBuffer, OCL_CLENQUEUEREADBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueUnmapMemObject, OCL_CLENQUEUEUNMAPMEMOBJECT, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clEnqueueWriteBuffer, OCL_CLENQUEUEWRITEBUFFER, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clFinish, OCL_CLFINISH, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clFlush, OCL_CLFLUSH, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetDeviceIDs, OCL_CLGETDEVICEIDS, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetDeviceInfo, OCL_CLGETDEVICEINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetEventInfo, OCL_CLGETEVENTINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetKernelWorkGroupInfo, OCL_CLGETKERNELWORKGROUPINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetPlatformIDs, OCL_CLGETPLATFORMIDS, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetPlatformInfo, OCL_CLGETPLATFORMINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetProgramBuildInfo, OCL_CLGETPROGRAMBUILDINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetProgramInfo, OCL_CLGETPROGRAMINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseCommandQueue, OCL_CLRELEASECOMMANDQUEUE, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseContext, OCL_CLRELEASECONTEXT, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseKernel, OCL_CLRELEASEKERNEL, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseMemObject, OCL_CLRELEASEMEMOBJECT, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseProgram, OCL_CLRELEASEPROGRAM, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clSetKernelArg, OCL_CLSETKERNELARG, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clWaitForEvents, OCL_CLWAITFOREVENTS, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clGetEventProfilingInfo, OCL_CLGETEVENTPROFILINGINFO, OpenCL, 1)
-  HC_LOAD_FUNC(ocl, clReleaseEvent, OCL_CLRELEASEEVENT, OpenCL, 1)
+  HC_LOAD_FUNC (ocl, clBuildProgram,            OCL_CLBUILDPROGRAM,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateBuffer,            OCL_CLCREATEBUFFER,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateCommandQueue,      OCL_CLCREATECOMMANDQUEUE,       OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateContext,           OCL_CLCREATECONTEXT,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateKernel,            OCL_CLCREATEKERNEL,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateProgramWithBinary, OCL_CLCREATEPROGRAMWITHBINARY,  OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateProgramWithSource, OCL_CLCREATEPROGRAMWITHSOURCE,  OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueCopyBuffer,       OCL_CLENQUEUECOPYBUFFER,        OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueMapBuffer,        OCL_CLENQUEUEMAPBUFFER,         OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueNDRangeKernel,    OCL_CLENQUEUENDRANGEKERNEL,     OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueReadBuffer,       OCL_CLENQUEUEREADBUFFER,        OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueUnmapMemObject,   OCL_CLENQUEUEUNMAPMEMOBJECT,    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueWriteBuffer,      OCL_CLENQUEUEWRITEBUFFER,       OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clFinish,                  OCL_CLFINISH,                   OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clFlush,                   OCL_CLFLUSH,                    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetDeviceIDs,            OCL_CLGETDEVICEIDS,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetDeviceInfo,           OCL_CLGETDEVICEINFO,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetEventInfo,            OCL_CLGETEVENTINFO,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetKernelWorkGroupInfo,  OCL_CLGETKERNELWORKGROUPINFO,   OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetPlatformIDs,          OCL_CLGETPLATFORMIDS,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetPlatformInfo,         OCL_CLGETPLATFORMINFO,          OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetProgramBuildInfo,     OCL_CLGETPROGRAMBUILDINFO,      OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetProgramInfo,          OCL_CLGETPROGRAMINFO,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseCommandQueue,     OCL_CLRELEASECOMMANDQUEUE,      OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseContext,          OCL_CLRELEASECONTEXT,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseKernel,           OCL_CLRELEASEKERNEL,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseMemObject,        OCL_CLRELEASEMEMOBJECT,         OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseProgram,          OCL_CLRELEASEPROGRAM,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clSetKernelArg,            OCL_CLSETKERNELARG,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clWaitForEvents,           OCL_CLWAITFOREVENTS,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetEventProfilingInfo,   OCL_CLGETEVENTPROFILINGINFO,    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseEvent,            OCL_CLRELEASEEVENT,             OpenCL, 1);
 
   return 0;
 }
 
 void ocl_close (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   if (ocl)
   {
@@ -647,15 +756,17 @@ void ocl_close (hashcat_ctx_t *hashcat_ctx)
       hc_dlclose (ocl->lib);
     }
 
-    hcfree (opencl_ctx->ocl);
+    hcfree (backend_ctx->ocl);
+
+    backend_ctx->ocl = NULL;
   }
 }
 
 int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueNDRangeKernel (command_queue, kernel, work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event);
 
@@ -671,9 +782,9 @@ int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue comm
 
 int hc_clGetEventInfo (hashcat_ctx_t *hashcat_ctx, cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetEventInfo (event, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -689,9 +800,9 @@ int hc_clGetEventInfo (hashcat_ctx_t *hashcat_ctx, cl_event event, cl_event_info
 
 int hc_clFlush (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clFlush (command_queue);
 
@@ -707,9 +818,9 @@ int hc_clFlush (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 
 int hc_clFinish (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clFinish (command_queue);
 
@@ -725,9 +836,9 @@ int hc_clFinish (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 
 int hc_clSetKernelArg (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clSetKernelArg (kernel, arg_index, arg_size, arg_value);
 
@@ -743,9 +854,9 @@ int hc_clSetKernelArg (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel, cl_uint arg
 
 int hc_clEnqueueWriteBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueWriteBuffer (command_queue, buffer, blocking_write, offset, size, ptr, num_events_in_wait_list, event_wait_list, event);
 
@@ -761,9 +872,9 @@ int hc_clEnqueueWriteBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue comman
 
 int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueCopyBuffer (command_queue, src_buffer, dst_buffer, src_offset, dst_offset, size, num_events_in_wait_list, event_wait_list, event);
 
@@ -779,9 +890,9 @@ int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command
 
 int hc_clEnqueueReadBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueReadBuffer (command_queue, buffer, blocking_read, offset, size, ptr, num_events_in_wait_list, event_wait_list, event);
 
@@ -797,9 +908,9 @@ int hc_clEnqueueReadBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command
 
 int hc_clGetPlatformIDs (hashcat_ctx_t *hashcat_ctx, cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetPlatformIDs (num_entries, platforms, num_platforms);
 
@@ -815,9 +926,9 @@ int hc_clGetPlatformIDs (hashcat_ctx_t *hashcat_ctx, cl_uint num_entries, cl_pla
 
 int hc_clGetPlatformInfo (hashcat_ctx_t *hashcat_ctx, cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetPlatformInfo (platform, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -833,9 +944,9 @@ int hc_clGetPlatformInfo (hashcat_ctx_t *hashcat_ctx, cl_platform_id platform, c
 
 int hc_clGetDeviceIDs (hashcat_ctx_t *hashcat_ctx, cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetDeviceIDs (platform, device_type, num_entries, devices, num_devices);
 
@@ -851,9 +962,9 @@ int hc_clGetDeviceIDs (hashcat_ctx_t *hashcat_ctx, cl_platform_id platform, cl_d
 
 int hc_clGetDeviceInfo (hashcat_ctx_t *hashcat_ctx, cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetDeviceInfo (device, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -869,9 +980,9 @@ int hc_clGetDeviceInfo (hashcat_ctx_t *hashcat_ctx, cl_device_id device, cl_devi
 
 int hc_clCreateContext (hashcat_ctx_t *hashcat_ctx, const cl_context_properties *properties, cl_uint num_devices, const cl_device_id *devices, void (CL_CALLBACK *pfn_notify) (const char *errinfo, const void *private_info, size_t cb, void *user_data), void *user_data, cl_context *context)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -889,9 +1000,9 @@ int hc_clCreateContext (hashcat_ctx_t *hashcat_ctx, const cl_context_properties
 
 int hc_clCreateCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_command_queue *command_queue)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -909,9 +1020,9 @@ int hc_clCreateCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_
 
 int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -929,9 +1040,9 @@ int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_fl
 
 int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_program *program)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -949,9 +1060,9 @@ int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context
 
 int hc_clCreateProgramWithBinary (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_program *program)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -969,9 +1080,9 @@ int hc_clCreateProgramWithBinary (hashcat_ctx_t *hashcat_ctx, cl_context context
 
 int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clBuildProgram (program, num_devices, device_list, options, pfn_notify, user_data);
 
@@ -987,9 +1098,9 @@ int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint n
 
 int hc_clCreateKernel (hashcat_ctx_t *hashcat_ctx, cl_program program, const char *kernel_name, cl_kernel *kernel)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -1007,9 +1118,9 @@ int hc_clCreateKernel (hashcat_ctx_t *hashcat_ctx, cl_program program, const cha
 
 int hc_clReleaseMemObject (hashcat_ctx_t *hashcat_ctx, cl_mem mem)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseMemObject (mem);
 
@@ -1025,9 +1136,9 @@ int hc_clReleaseMemObject (hashcat_ctx_t *hashcat_ctx, cl_mem mem)
 
 int hc_clReleaseKernel (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseKernel (kernel);
 
@@ -1043,9 +1154,9 @@ int hc_clReleaseKernel (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel)
 
 int hc_clReleaseProgram (hashcat_ctx_t *hashcat_ctx, cl_program program)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseProgram (program);
 
@@ -1061,9 +1172,9 @@ int hc_clReleaseProgram (hashcat_ctx_t *hashcat_ctx, cl_program program)
 
 int hc_clReleaseCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseCommandQueue (command_queue);
 
@@ -1079,9 +1190,9 @@ int hc_clReleaseCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_command_queue comma
 
 int hc_clReleaseContext (hashcat_ctx_t *hashcat_ctx, cl_context context)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseContext (context);
 
@@ -1097,9 +1208,9 @@ int hc_clReleaseContext (hashcat_ctx_t *hashcat_ctx, cl_context context)
 
 int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, void **buf)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   cl_int CL_err;
 
@@ -1117,9 +1228,9 @@ int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_
 
 int hc_clEnqueueUnmapMemObject (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clEnqueueUnmapMemObject (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event);
 
@@ -1135,9 +1246,9 @@ int hc_clEnqueueUnmapMemObject (hashcat_ctx_t *hashcat_ctx, cl_command_queue com
 
 int hc_clGetKernelWorkGroupInfo (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetKernelWorkGroupInfo (kernel, device, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -1153,9 +1264,9 @@ int hc_clGetKernelWorkGroupInfo (hashcat_ctx_t *hashcat_ctx, cl_kernel kernel, c
 
 int hc_clGetProgramBuildInfo (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetProgramBuildInfo (program, device, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -1171,9 +1282,9 @@ int hc_clGetProgramBuildInfo (hashcat_ctx_t *hashcat_ctx, cl_program program, cl
 
 int hc_clGetProgramInfo (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetProgramInfo (program, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -1189,9 +1300,9 @@ int hc_clGetProgramInfo (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_prog
 
 int hc_clWaitForEvents (hashcat_ctx_t *hashcat_ctx, cl_uint num_events, const cl_event *event_list)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clWaitForEvents (num_events, event_list);
 
@@ -1207,9 +1318,9 @@ int hc_clWaitForEvents (hashcat_ctx_t *hashcat_ctx, cl_uint num_events, const cl
 
 int hc_clGetEventProfilingInfo (hashcat_ctx_t *hashcat_ctx, cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clGetEventProfilingInfo (event, param_name, param_value_size, param_value, param_value_size_ret);
 
@@ -1225,9 +1336,9 @@ int hc_clGetEventProfilingInfo (hashcat_ctx_t *hashcat_ctx, cl_event event, cl_p
 
 int hc_clReleaseEvent (hashcat_ctx_t *hashcat_ctx, cl_event event)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  OCL_PTR *ocl = opencl_ctx->ocl;
+  OCL_PTR *ocl = backend_ctx->ocl;
 
   const cl_int CL_err = ocl->clReleaseEvent (event);
 
@@ -2927,12 +3038,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
   return 0;
 }
 
-int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
+int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
-  opencl_ctx->enabled = false;
+  backend_ctx->enabled = false;
 
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
@@ -2943,7 +3054,22 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   hc_device_param_t *devices_param = (hc_device_param_t *) hccalloc (DEVICES_MAX, sizeof (hc_device_param_t));
 
-  opencl_ctx->devices_param = devices_param;
+  backend_ctx->devices_param = devices_param;
+
+  /**
+   * Load and map CUDA library calls
+   */
+
+  CUDA_PTR *cuda = (CUDA_PTR *) hcmalloc (sizeof (CUDA_PTR));
+
+  backend_ctx->cuda = cuda;
+
+  const int rc_cuda_init = cuda_init (hashcat_ctx);
+
+  if (rc_cuda_init == -1)
+  {
+    cuda_close (hashcat_ctx);
+  }
 
   /**
    * Load and map OpenCL library calls
@@ -2951,11 +3077,23 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   OCL_PTR *ocl = (OCL_PTR *) hcmalloc (sizeof (OCL_PTR));
 
-  opencl_ctx->ocl = ocl;
+  backend_ctx->ocl = ocl;
 
   const int rc_ocl_init = ocl_init (hashcat_ctx);
 
-  if (rc_ocl_init == -1) return -1;
+  if (rc_ocl_init == -1)
+  {
+    ocl_close (hashcat_ctx);
+  }
+
+  /**
+   * return if both CUDA and OpenCL initialization failed
+   */
+
+  if ((rc_cuda_init == -1) && (rc_ocl_init == -1))
+  {
+    return -1;
+  }
 
   /**
    * Some permission pre-check, because AMDGPU-PRO Driver crashes if the user has no permission to do this
@@ -2975,7 +3113,7 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (rc_platforms_filter == false) return -1;
 
-  opencl_ctx->opencl_platforms_filter = opencl_platforms_filter;
+  backend_ctx->opencl_platforms_filter = opencl_platforms_filter;
 
   /**
    * OpenCL device selection
@@ -2987,7 +3125,7 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (rc_devices_filter == false) return -1;
 
-  opencl_ctx->devices_filter = devices_filter;
+  backend_ctx->devices_filter = devices_filter;
 
   /**
    * OpenCL device type selection
@@ -2999,7 +3137,7 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (rc_device_types_filter == false) return -1;
 
-  opencl_ctx->device_types_filter = device_types_filter;
+  backend_ctx->device_types_filter = device_types_filter;
 
   /**
    * OpenCL platforms: detect
@@ -3139,58 +3277,58 @@ int opencl_ctx_init (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
-    opencl_ctx->device_types_filter = device_types_filter;
+    backend_ctx->device_types_filter = device_types_filter;
   }
 
-  opencl_ctx->enabled = true;
+  backend_ctx->enabled = true;
 
-  opencl_ctx->platforms_vendor      = platforms_vendor;
-  opencl_ctx->platforms_name        = platforms_name;
-  opencl_ctx->platforms_version     = platforms_version;
-  opencl_ctx->platforms_skipped     = platforms_skipped;
-  opencl_ctx->platforms_cnt         = platforms_cnt;
-  opencl_ctx->platforms             = platforms;
-  opencl_ctx->platform_devices_cnt  = platform_devices_cnt;
-  opencl_ctx->platform_devices      = platform_devices;
+  backend_ctx->platforms_vendor      = platforms_vendor;
+  backend_ctx->platforms_name        = platforms_name;
+  backend_ctx->platforms_version     = platforms_version;
+  backend_ctx->platforms_skipped     = platforms_skipped;
+  backend_ctx->platforms_cnt         = platforms_cnt;
+  backend_ctx->platforms             = platforms;
+  backend_ctx->platform_devices_cnt  = platform_devices_cnt;
+  backend_ctx->platform_devices      = platform_devices;
 
   return 0;
 }
 
-void opencl_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
   ocl_close (hashcat_ctx);
 
-  hcfree (opencl_ctx->devices_param);
+  hcfree (backend_ctx->devices_param);
 
-  hcfree (opencl_ctx->platforms);
-  hcfree (opencl_ctx->platform_devices);
-  hcfree (opencl_ctx->platforms_vendor);
-  hcfree (opencl_ctx->platforms_name);
-  hcfree (opencl_ctx->platforms_version);
-  hcfree (opencl_ctx->platforms_skipped);
+  hcfree (backend_ctx->platforms);
+  hcfree (backend_ctx->platform_devices);
+  hcfree (backend_ctx->platforms_vendor);
+  hcfree (backend_ctx->platforms_name);
+  hcfree (backend_ctx->platforms_version);
+  hcfree (backend_ctx->platforms_skipped);
 
-  memset (opencl_ctx, 0, sizeof (opencl_ctx_t));
+  memset (backend_ctx, 0, sizeof (backend_ctx_t));
 }
 
-int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
+int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 {
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
   /**
    * OpenCL devices: simply push all devices from all platforms into the same device array
    */
 
-  cl_uint         platforms_cnt         = opencl_ctx->platforms_cnt;
-  cl_platform_id *platforms             = opencl_ctx->platforms;
-  cl_uint         platform_devices_cnt  = opencl_ctx->platform_devices_cnt;
-  cl_device_id   *platform_devices      = opencl_ctx->platform_devices;
+  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
+  cl_platform_id *platforms             = backend_ctx->platforms;
+  cl_uint         platform_devices_cnt  = backend_ctx->platform_devices_cnt;
+  cl_device_id   *platform_devices      = backend_ctx->platform_devices;
 
   bool need_adl     = false;
   bool need_nvml    = false;
@@ -3221,7 +3359,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (CL_rc == -1) return -1;
 
-    opencl_ctx->platforms_vendor[platform_id] = platform_vendor;
+    backend_ctx->platforms_vendor[platform_id] = platform_vendor;
 
     // platform name
 
@@ -3235,7 +3373,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (CL_rc == -1) return -1;
 
-    opencl_ctx->platforms_name[platform_id] = platform_name;
+    backend_ctx->platforms_name[platform_id] = platform_name;
 
     // platform version
 
@@ -3249,7 +3387,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (CL_rc == -1) return -1;
 
-    opencl_ctx->platforms_version[platform_id] = platform_version;
+    backend_ctx->platforms_version[platform_id] = platform_version;
 
     // find our own platform vendor because pocl and mesa are pushing original vendor_id through opencl
     // this causes trouble with vendor id based macros
@@ -3298,7 +3436,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       platform_vendor_id = VENDOR_ID_GENERIC;
     }
 
-    bool platform_skipped = ((opencl_ctx->opencl_platforms_filter & (1ULL << platform_id)) == 0);
+    bool platform_skipped = ((backend_ctx->opencl_platforms_filter & (1ULL << platform_id)) == 0);
 
     CL_rc = hc_clGetDeviceIDs (hashcat_ctx, platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, platform_devices, &platform_devices_cnt);
 
@@ -3311,7 +3449,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       platform_skipped = true;
     }
 
-    opencl_ctx->platforms_skipped[platform_id] = platform_skipped;
+    backend_ctx->platforms_skipped[platform_id] = platform_skipped;
 
     if (platform_skipped == true) continue;
 
@@ -3331,7 +3469,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       }
     }
 
-    hc_device_param_t *devices_param = opencl_ctx->devices_param;
+    hc_device_param_t *devices_param = backend_ctx->devices_param;
 
     for (u32 platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
     {
@@ -3710,12 +3848,12 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // skipped
 
-      if ((opencl_ctx->devices_filter & (1ULL << device_id)) == 0)
+      if ((backend_ctx->devices_filter & (1ULL << device_id)) == 0)
       {
         device_param->skipped = true;
       }
 
-      if ((opencl_ctx->device_types_filter & (device_type)) == 0)
+      if ((backend_ctx->device_types_filter & (device_type)) == 0)
       {
         device_param->skipped = true;
       }
@@ -4091,7 +4229,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
           cl_int CL_err;
 
-          OCL_PTR *ocl = opencl_ctx->ocl;
+          OCL_PTR *ocl = backend_ctx->ocl;
 
           tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
 
@@ -4161,11 +4299,11 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
 
-  if (opencl_ctx->devices_filter != (u64) -1)
+  if (backend_ctx->devices_filter != (u64) -1)
   {
     const u64 devices_cnt_mask = ~(((u64) -1 >> devices_cnt) << devices_cnt);
 
-    if (opencl_ctx->devices_filter > devices_cnt_mask)
+    if (backend_ctx->devices_filter > devices_cnt_mask)
     {
       event_log_error (hashcat_ctx, "An invalid device was specified using the --opencl-devices parameter.");
       event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", devices_cnt);
@@ -4174,37 +4312,37 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
     }
   }
 
-  opencl_ctx->target_msec     = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
+  backend_ctx->target_msec     = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
 
-  opencl_ctx->devices_cnt     = devices_cnt;
-  opencl_ctx->devices_active  = devices_active;
+  backend_ctx->devices_cnt     = devices_cnt;
+  backend_ctx->devices_active  = devices_active;
 
-  opencl_ctx->need_adl        = need_adl;
-  opencl_ctx->need_nvml       = need_nvml;
-  opencl_ctx->need_nvapi      = need_nvapi;
-  opencl_ctx->need_sysfs      = need_sysfs;
+  backend_ctx->need_adl        = need_adl;
+  backend_ctx->need_nvml       = need_nvml;
+  backend_ctx->need_nvapi      = need_nvapi;
+  backend_ctx->need_sysfs      = need_sysfs;
 
-  opencl_ctx->comptime        = comptime;
+  backend_ctx->comptime        = comptime;
 
   return 0;
 }
 
-void opencl_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 platform_id = 0; platform_id < opencl_ctx->platforms_cnt; platform_id++)
+  for (u32 platform_id = 0; platform_id < backend_ctx->platforms_cnt; platform_id++)
   {
-    hcfree (opencl_ctx->platforms_vendor[platform_id]);
-    hcfree (opencl_ctx->platforms_name[platform_id]);
-    hcfree (opencl_ctx->platforms_version[platform_id]);
+    hcfree (backend_ctx->platforms_vendor[platform_id]);
+    hcfree (backend_ctx->platforms_name[platform_id]);
+    hcfree (backend_ctx->platforms_version[platform_id]);
   }
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -4215,13 +4353,13 @@ void opencl_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
     hcfree (device_param->device_vendor);
   }
 
-  opencl_ctx->devices_cnt    = 0;
-  opencl_ctx->devices_active = 0;
+  backend_ctx->devices_cnt    = 0;
+  backend_ctx->devices_active = 0;
 
-  opencl_ctx->need_adl    = false;
-  opencl_ctx->need_nvml   = false;
-  opencl_ctx->need_nvapi  = false;
-  opencl_ctx->need_sysfs  = false;
+  backend_ctx->need_adl    = false;
+  backend_ctx->need_nvml   = false;
+  backend_ctx->need_nvapi  = false;
+  backend_ctx->need_sysfs  = false;
 }
 
 static bool is_same_device_type (const hc_device_param_t *src, const hc_device_param_t *dst)
@@ -4247,23 +4385,23 @@ static bool is_same_device_type (const hc_device_param_t *src, const hc_device_p
   return true;
 }
 
-void opencl_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id_src = 0; device_id_src < opencl_ctx->devices_cnt; device_id_src++)
+  for (u32 device_id_src = 0; device_id_src < backend_ctx->devices_cnt; device_id_src++)
   {
-    hc_device_param_t *device_param_src = &opencl_ctx->devices_param[device_id_src];
+    hc_device_param_t *device_param_src = &backend_ctx->devices_param[device_id_src];
 
     if (device_param_src->skipped == true) continue;
 
     if (device_param_src->skipped_warning == true) continue;
 
-    for (u32 device_id_dst = device_id_src; device_id_dst < opencl_ctx->devices_cnt; device_id_dst++)
+    for (u32 device_id_dst = device_id_src; device_id_dst < backend_ctx->devices_cnt; device_id_dst++)
     {
-      hc_device_param_t *device_param_dst = &opencl_ctx->devices_param[device_id_dst];
+      hc_device_param_t *device_param_dst = &backend_ctx->devices_param[device_id_dst];
 
       if (device_param_dst->skipped == true) continue;
 
@@ -4286,20 +4424,20 @@ void opencl_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t         *opencl_ctx          = hashcat_ctx->opencl_ctx;
+  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
   status_ctx_t         *status_ctx          = hashcat_ctx->status_ctx;
   user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
   user_options_t       *user_options        = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
   u32 kernel_power_all = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -4308,7 +4446,7 @@ void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
     kernel_power_all += device_param->kernel_power;
   }
 
-  opencl_ctx->kernel_power_all = kernel_power_all;
+  backend_ctx->kernel_power_all = kernel_power_all;
 
   /*
    * Inform user about possible slow speeds
@@ -4330,22 +4468,22 @@ void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-void opencl_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx)
+void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx)
 {
   combinator_ctx_t     *combinator_ctx      = hashcat_ctx->combinator_ctx;
   hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
   hashes_t             *hashes              = hashcat_ctx->hashes;
   mask_ctx_t           *mask_ctx            = hashcat_ctx->mask_ctx;
-  opencl_ctx_t         *opencl_ctx          = hashcat_ctx->opencl_ctx;
+  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
   straight_ctx_t       *straight_ctx        = hashcat_ctx->straight_ctx;
   user_options_t       *user_options        = hashcat_ctx->user_options;
   user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -4567,23 +4705,23 @@ static u32 get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param
   return kernel_threads;
 }
 
-int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
+int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 {
   const bitmap_ctx_t         *bitmap_ctx          = hashcat_ctx->bitmap_ctx;
   const folder_config_t      *folder_config       = hashcat_ctx->folder_config;
   const hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
   const hashes_t             *hashes              = hashcat_ctx->hashes;
   const module_ctx_t         *module_ctx          = hashcat_ctx->module_ctx;
-        opencl_ctx_t         *opencl_ctx          = hashcat_ctx->opencl_ctx;
+        backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
   const straight_ctx_t       *straight_ctx        = hashcat_ctx->straight_ctx;
   const user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
   const user_options_t       *user_options        = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
   u32 hardware_power_all = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     int CL_rc = CL_SUCCESS;
 
@@ -4591,7 +4729,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
      * host buffer
      */
 
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -4996,7 +5134,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
     char *device_name_chksum_amp_mp = (char *) hcmalloc (HCBUFSIZ_TINY);
 
     const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%u-%s-%s-%s-%d-%u",
-      opencl_ctx->comptime,
+      backend_ctx->comptime,
       device_param->platform_vendor_id,
       device_param->device_name,
       device_param->device_version,
@@ -5005,7 +5143,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
       hashconfig->kern_type);
 
     const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%u-%s-%s-%s",
-      opencl_ctx->comptime,
+      backend_ctx->comptime,
       device_param->platform_vendor_id,
       device_param->device_name,
       device_param->device_version,
@@ -6700,7 +6838,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
       #endif
 
       // we assume all devices have the same specs here, which is wrong, it's a start
-      if ((size_total_host * opencl_ctx->devices_cnt) > MAX_HOST_MEMORY) memory_limit_hit = 1;
+      if ((size_total_host * backend_ctx->devices_cnt) > MAX_HOST_MEMORY) memory_limit_hit = 1;
 
       if (memory_limit_hit == 1)
       {
@@ -6864,20 +7002,20 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
     if (hardware_power_all == 0) return -1;
   }
 
-  opencl_ctx->hardware_power_all = hardware_power_all;
+  backend_ctx->hardware_power_all = hardware_power_all;
 
   return 0;
 }
 
-void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
+void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -7035,15 +7173,15 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-void opencl_session_reset (hashcat_ctx_t *hashcat_ctx)
+void backend_session_reset (hashcat_ctx_t *hashcat_ctx)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return;
+  if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -7081,22 +7219,22 @@ void opencl_session_reset (hashcat_ctx_t *hashcat_ctx)
     #endif
   }
 
-  opencl_ctx->kernel_power_all   = 0;
-  opencl_ctx->kernel_power_final = 0;
+  backend_ctx->kernel_power_all   = 0;
+  backend_ctx->kernel_power_final = 0;
 }
 
-int opencl_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
+int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
 {
   combinator_ctx_t *combinator_ctx = hashcat_ctx->combinator_ctx;
   hashconfig_t     *hashconfig     = hashcat_ctx->hashconfig;
-  opencl_ctx_t     *opencl_ctx     = hashcat_ctx->opencl_ctx;
+  backend_ctx_t     *backend_ctx     = hashcat_ctx->backend_ctx;
   user_options_t   *user_options   = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -7143,19 +7281,19 @@ int opencl_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
   return 0;
 }
 
-int opencl_session_update_mp (hashcat_ctx_t *hashcat_ctx)
+int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
 {
   mask_ctx_t     *mask_ctx     = hashcat_ctx->mask_ctx;
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
   if (user_options->slow_candidates == true) return 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -7176,19 +7314,19 @@ int opencl_session_update_mp (hashcat_ctx_t *hashcat_ctx)
   return 0;
 }
 
-int opencl_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r)
+int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r)
 {
   mask_ctx_t     *mask_ctx     = hashcat_ctx->mask_ctx;
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
-  if (opencl_ctx->enabled == false) return 0;
+  if (backend_ctx->enabled == false) return 0;
 
   if (user_options->slow_candidates == true) return 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
diff --git a/src/dispatch.c b/src/dispatch.c
index e43a23ad0..466b16604 100644
--- a/src/dispatch.c
+++ b/src/dispatch.c
@@ -7,7 +7,7 @@
 #include "types.h"
 #include "event.h"
 #include "memory.h"
-#include "opencl.h"
+#include "backend.h"
 #include "wordlist.h"
 #include "shared.h"
 #include "thread.h"
@@ -23,13 +23,13 @@
 
 static u64 get_highest_words_done (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64 words_cur = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -45,13 +45,13 @@ static u64 get_highest_words_done (const hashcat_ctx_t *hashcat_ctx)
 
 static u64 get_lowest_words_done (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64 words_cur = 0xffffffffffffffff;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
@@ -76,20 +76,20 @@ static int set_kernel_power_final (hashcat_ctx_t *hashcat_ctx, const u64 kernel_
 {
   EVENT (EVENT_SET_KERNEL_POWER_FINAL);
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  opencl_ctx->kernel_power_final = kernel_power_final;
+  backend_ctx->kernel_power_final = kernel_power_final;
 
   return 0;
 }
 
-static u64 get_power (opencl_ctx_t *opencl_ctx, hc_device_param_t *device_param)
+static u64 get_power (backend_ctx_t *backend_ctx, hc_device_param_t *device_param)
 {
-  const u64 kernel_power_final = opencl_ctx->kernel_power_final;
+  const u64 kernel_power_final = backend_ctx->kernel_power_final;
 
   if (kernel_power_final)
   {
-    const double device_factor = (double) device_param->hardware_power / opencl_ctx->hardware_power_all;
+    const double device_factor = (double) device_param->hardware_power / backend_ctx->hardware_power_all;
 
     const u64 words_left_device = (u64) CEIL (kernel_power_final * device_factor);
 
@@ -109,7 +109,7 @@ static u64 get_power (opencl_ctx_t *opencl_ctx, hc_device_param_t *device_param)
 
 static u64 get_work (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 max)
 {
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   status_ctx_t   *status_ctx   = hashcat_ctx->status_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
@@ -120,19 +120,19 @@ static u64 get_work (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   device_param->words_off = words_off;
 
-  const u64 kernel_power_all = opencl_ctx->kernel_power_all;
+  const u64 kernel_power_all = backend_ctx->kernel_power_all;
 
   const u64 words_left = words_base - words_off;
 
   if (words_left < kernel_power_all)
   {
-    if (opencl_ctx->kernel_power_final == 0)
+    if (backend_ctx->kernel_power_final == 0)
     {
       set_kernel_power_final (hashcat_ctx, words_left);
     }
   }
 
-  const u64 kernel_power = get_power (opencl_ctx, device_param);
+  const u64 kernel_power = get_power (backend_ctx, device_param);
 
   u64 work = MIN (words_left, kernel_power);
 
@@ -339,11 +339,11 @@ HC_API_CALL void *thread_calc_stdin (void *p)
 
   hashcat_ctx_t *hashcat_ctx = thread_param->hashcat_ctx;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return NULL;
+  if (backend_ctx->enabled == false) return NULL;
 
-  hc_device_param_t *device_param = opencl_ctx->devices_param + thread_param->tid;
+  hc_device_param_t *device_param = backend_ctx->devices_param + thread_param->tid;
 
   if (device_param->skipped) return NULL;
 
@@ -370,7 +370,7 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
   mask_ctx_t           *mask_ctx           = hashcat_ctx->mask_ctx;
   straight_ctx_t       *straight_ctx       = hashcat_ctx->straight_ctx;
   combinator_ctx_t     *combinator_ctx     = hashcat_ctx->combinator_ctx;
-  opencl_ctx_t         *opencl_ctx         = hashcat_ctx->opencl_ctx;
+  backend_ctx_t        *backend_ctx        = hashcat_ctx->backend_ctx;
   status_ctx_t         *status_ctx         = hashcat_ctx->status_ctx;
 
   const u32 attack_mode = user_options->attack_mode;
@@ -468,7 +468,7 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
         // this greatly reduces spam on hashcat console
 
-        const u64 pre_rejects_ignore = get_power (opencl_ctx, device_param) / 2;
+        const u64 pre_rejects_ignore = get_power (backend_ctx, device_param) / 2;
 
         while (pre_rejects > pre_rejects_ignore)
         {
@@ -801,7 +801,7 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
         // this greatly reduces spam on hashcat console
 
-        const u64 pre_rejects_ignore = get_power (opencl_ctx, device_param) / 2;
+        const u64 pre_rejects_ignore = get_power (backend_ctx, device_param) / 2;
 
         while (pre_rejects > pre_rejects_ignore)
         {
@@ -1082,7 +1082,7 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
         // this greatly reduces spam on hashcat console
 
-        const u64 pre_rejects_ignore = get_power (opencl_ctx, device_param) / 2;
+        const u64 pre_rejects_ignore = get_power (backend_ctx, device_param) / 2;
 
         while (pre_rejects > pre_rejects_ignore)
         {
@@ -1658,11 +1658,11 @@ HC_API_CALL void *thread_calc (void *p)
 
   hashcat_ctx_t *hashcat_ctx = thread_param->hashcat_ctx;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return NULL;
+  if (backend_ctx->enabled == false) return NULL;
 
-  hc_device_param_t *device_param = opencl_ctx->devices_param + thread_param->tid;
+  hc_device_param_t *device_param = backend_ctx->devices_param + thread_param->tid;
 
   if (device_param->skipped) return NULL;
 
diff --git a/src/emu_inc_platform.c b/src/emu_inc_platform.c
new file mode 100644
index 000000000..f390abf0a
--- /dev/null
+++ b/src/emu_inc_platform.c
@@ -0,0 +1,11 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "bitops.h"
+#include "emu_general.h"
+
+#include "inc_platform.cl"
diff --git a/src/ext_cuda.c b/src/ext_cuda.c
new file mode 100644
index 000000000..dc43e1b61
--- /dev/null
+++ b/src/ext_cuda.c
@@ -0,0 +1,8 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "ext_cuda.h"
diff --git a/src/hashcat.c b/src/hashcat.c
index b45319520..68fe33d73 100644
--- a/src/hashcat.c
+++ b/src/hashcat.c
@@ -34,7 +34,7 @@
 #include "loopback.h"
 #include "monitor.h"
 #include "mpsp.h"
-#include "opencl.h"
+#include "backend.h"
 #include "outfile_check.h"
 #include "outfile.h"
 #include "pidfile.h"
@@ -59,7 +59,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
   hashes_t             *hashes              = hashcat_ctx->hashes;
   induct_ctx_t         *induct_ctx          = hashcat_ctx->induct_ctx;
   logfile_ctx_t        *logfile_ctx         = hashcat_ctx->logfile_ctx;
-  opencl_ctx_t         *opencl_ctx          = hashcat_ctx->opencl_ctx;
+  backend_ctx_t        *backend_ctx         = hashcat_ctx->backend_ctx;
   restore_ctx_t        *restore_ctx         = hashcat_ctx->restore_ctx;
   status_ctx_t         *status_ctx          = hashcat_ctx->status_ctx;
   user_options_extra_t *user_options_extra  = hashcat_ctx->user_options_extra;
@@ -109,7 +109,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
     user_options->skip = 0;
   }
 
-  opencl_session_reset (hashcat_ctx);
+  backend_session_reset (hashcat_ctx);
 
   cpt_ctx_reset (hashcat_ctx);
 
@@ -174,15 +174,15 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
    * this is required for autotune
    */
 
-  opencl_ctx_devices_kernel_loops (hashcat_ctx);
+  backend_ctx_devices_kernel_loops (hashcat_ctx);
 
   /**
    * prepare thread buffers
    */
 
-  thread_param_t *threads_param = (thread_param_t *) hccalloc (opencl_ctx->devices_cnt, sizeof (thread_param_t));
+  thread_param_t *threads_param = (thread_param_t *) hccalloc (backend_ctx->devices_cnt, sizeof (thread_param_t));
 
-  hc_thread_t *c_threads = (hc_thread_t *) hccalloc (opencl_ctx->devices_cnt, sizeof (hc_thread_t));
+  hc_thread_t *c_threads = (hc_thread_t *) hccalloc (backend_ctx->devices_cnt, sizeof (hc_thread_t));
 
   /**
    * create autotune threads
@@ -192,7 +192,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
 
   status_ctx->devices_status = STATUS_AUTOTUNE;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     thread_param_t *thread_param = threads_param + device_id;
 
@@ -202,7 +202,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
     hc_thread_create (c_threads[device_id], thread_autotune, thread_param);
   }
 
-  hc_thread_wait (opencl_ctx->devices_cnt, c_threads);
+  hc_thread_wait (backend_ctx->devices_cnt, c_threads);
 
   EVENT (EVENT_AUTOTUNE_FINISHED);
 
@@ -210,13 +210,13 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
    * find same opencl devices and equal results
    */
 
-  opencl_ctx_devices_sync_tuning (hashcat_ctx);
+  backend_ctx_devices_sync_tuning (hashcat_ctx);
 
   /**
-   * autotune modified kernel_accel, which modifies opencl_ctx->kernel_power_all
+   * autotune modified kernel_accel, which modifies backend_ctx->kernel_power_all
    */
 
-  opencl_ctx_devices_update_power (hashcat_ctx);
+  backend_ctx_devices_update_power (hashcat_ctx);
 
   /**
    * Begin loopback recording
@@ -249,7 +249,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
 
   status_ctx->accessible = true;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     thread_param_t *thread_param = threads_param + device_id;
 
@@ -266,7 +266,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  hc_thread_wait (opencl_ctx->devices_cnt, c_threads);
+  hc_thread_wait (backend_ctx->devices_cnt, c_threads);
 
   hcfree (c_threads);
 
@@ -438,7 +438,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
   hashconfig_t   *hashconfig    = hashcat_ctx->hashconfig;
   hashes_t       *hashes        = hashcat_ctx->hashes;
   mask_ctx_t     *mask_ctx      = hashcat_ctx->mask_ctx;
-  opencl_ctx_t   *opencl_ctx    = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx   = hashcat_ctx->backend_ctx;
   outcheck_ctx_t *outcheck_ctx  = hashcat_ctx->outcheck_ctx;
   restore_ctx_t  *restore_ctx   = hashcat_ctx->restore_ctx;
   status_ctx_t   *status_ctx    = hashcat_ctx->status_ctx;
@@ -722,7 +722,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
 
   EVENT (EVENT_OPENCL_SESSION_PRE);
 
-  const int rc_session_begin = opencl_session_begin (hashcat_ctx);
+  const int rc_session_begin = backend_session_begin (hashcat_ctx);
 
   if (rc_session_begin == -1) return -1;
 
@@ -736,13 +736,13 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
   {
     EVENT (EVENT_SELFTEST_STARTING);
 
-    thread_param_t *threads_param = (thread_param_t *) hccalloc (opencl_ctx->devices_cnt, sizeof (thread_param_t));
+    thread_param_t *threads_param = (thread_param_t *) hccalloc (backend_ctx->devices_cnt, sizeof (thread_param_t));
 
-    hc_thread_t *selftest_threads = (hc_thread_t *) hccalloc (opencl_ctx->devices_cnt, sizeof (hc_thread_t));
+    hc_thread_t *selftest_threads = (hc_thread_t *) hccalloc (backend_ctx->devices_cnt, sizeof (hc_thread_t));
 
     status_ctx->devices_status = STATUS_SELFTEST;
 
-    for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+    for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
     {
       thread_param_t *thread_param = threads_param + device_id;
 
@@ -752,7 +752,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
       hc_thread_create (selftest_threads[device_id], thread_selftest, thread_param);
     }
 
-    hc_thread_wait (opencl_ctx->devices_cnt, selftest_threads);
+    hc_thread_wait (backend_ctx->devices_cnt, selftest_threads);
 
     hcfree (threads_param);
 
@@ -760,11 +760,11 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
 
     // check for any selftest failures
 
-    for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+    for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
     {
-      if (opencl_ctx->enabled == false) continue;
+      if (backend_ctx->enabled == false) continue;
 
-      hc_device_param_t *device_param = opencl_ctx->devices_param + device_id;
+      hc_device_param_t *device_param = backend_ctx->devices_param + device_id;
 
       if (device_param->skipped == true) continue;
 
@@ -881,7 +881,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
 
   // finalize opencl session
 
-  opencl_session_destroy (hashcat_ctx);
+  backend_session_destroy (hashcat_ctx);
 
   // clean up
 
@@ -930,7 +930,7 @@ int hashcat_init (hashcat_ctx_t *hashcat_ctx, void (*event) (const u32, struct h
   hashcat_ctx->loopback_ctx       = (loopback_ctx_t *)        hcmalloc (sizeof (loopback_ctx_t));
   hashcat_ctx->mask_ctx           = (mask_ctx_t *)            hcmalloc (sizeof (mask_ctx_t));
   hashcat_ctx->module_ctx         = (module_ctx_t *)          hcmalloc (sizeof (module_ctx_t));
-  hashcat_ctx->opencl_ctx         = (opencl_ctx_t *)          hcmalloc (sizeof (opencl_ctx_t));
+  hashcat_ctx->backend_ctx        = (backend_ctx_t *)         hcmalloc (sizeof (backend_ctx_t));
   hashcat_ctx->outcheck_ctx       = (outcheck_ctx_t *)        hcmalloc (sizeof (outcheck_ctx_t));
   hashcat_ctx->outfile_ctx        = (outfile_ctx_t *)         hcmalloc (sizeof (outfile_ctx_t));
   hashcat_ctx->pidfile_ctx        = (pidfile_ctx_t *)         hcmalloc (sizeof (pidfile_ctx_t));
@@ -964,7 +964,7 @@ void hashcat_destroy (hashcat_ctx_t *hashcat_ctx)
   hcfree (hashcat_ctx->loopback_ctx);
   hcfree (hashcat_ctx->mask_ctx);
   hcfree (hashcat_ctx->module_ctx);
-  hcfree (hashcat_ctx->opencl_ctx);
+  hcfree (hashcat_ctx->backend_ctx);
   hcfree (hashcat_ctx->outcheck_ctx);
   hcfree (hashcat_ctx->outfile_ctx);
   hcfree (hashcat_ctx->pidfile_ctx);
@@ -1172,15 +1172,15 @@ int hashcat_session_init (hashcat_ctx_t *hashcat_ctx, const char *install_folder
    * Init OpenCL library loader
    */
 
-  const int rc_opencl_init = opencl_ctx_init (hashcat_ctx);
+  const int rc_backend_init = backend_ctx_init (hashcat_ctx);
 
-  if (rc_opencl_init == -1) return -1;
+  if (rc_backend_init == -1) return -1;
 
   /**
    * Init OpenCL devices
    */
 
-  const int rc_devices_init = opencl_ctx_devices_init (hashcat_ctx, comptime);
+  const int rc_devices_init = backend_ctx_devices_init (hashcat_ctx, comptime);
 
   if (rc_devices_init == -1) return -1;
 
@@ -1341,25 +1341,25 @@ int hashcat_session_destroy (hashcat_ctx_t *hashcat_ctx)
   #endif
   #endif
 
-  debugfile_destroy          (hashcat_ctx);
-  dictstat_destroy           (hashcat_ctx);
-  folder_config_destroy      (hashcat_ctx);
-  hwmon_ctx_destroy          (hashcat_ctx);
-  induct_ctx_destroy         (hashcat_ctx);
-  logfile_destroy            (hashcat_ctx);
-  loopback_destroy           (hashcat_ctx);
-  opencl_ctx_devices_destroy (hashcat_ctx);
-  opencl_ctx_destroy         (hashcat_ctx);
-  outcheck_ctx_destroy       (hashcat_ctx);
-  outfile_destroy            (hashcat_ctx);
-  pidfile_ctx_destroy        (hashcat_ctx);
-  potfile_destroy            (hashcat_ctx);
-  restore_ctx_destroy        (hashcat_ctx);
-  tuning_db_destroy          (hashcat_ctx);
-  user_options_destroy       (hashcat_ctx);
-  user_options_extra_destroy (hashcat_ctx);
-  status_ctx_destroy         (hashcat_ctx);
-  event_ctx_destroy          (hashcat_ctx);
+  debugfile_destroy           (hashcat_ctx);
+  dictstat_destroy            (hashcat_ctx);
+  folder_config_destroy       (hashcat_ctx);
+  hwmon_ctx_destroy           (hashcat_ctx);
+  induct_ctx_destroy          (hashcat_ctx);
+  logfile_destroy             (hashcat_ctx);
+  loopback_destroy            (hashcat_ctx);
+  backend_ctx_devices_destroy (hashcat_ctx);
+  backend_ctx_destroy         (hashcat_ctx);
+  outcheck_ctx_destroy        (hashcat_ctx);
+  outfile_destroy             (hashcat_ctx);
+  pidfile_ctx_destroy         (hashcat_ctx);
+  potfile_destroy             (hashcat_ctx);
+  restore_ctx_destroy         (hashcat_ctx);
+  tuning_db_destroy           (hashcat_ctx);
+  user_options_destroy        (hashcat_ctx);
+  user_options_extra_destroy  (hashcat_ctx);
+  status_ctx_destroy          (hashcat_ctx);
+  event_ctx_destroy           (hashcat_ctx);
 
   return 0;
 }
diff --git a/src/hashes.c b/src/hashes.c
index 3b1aeb50a..653123d9d 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -14,7 +14,7 @@
 #include "terminal.h"
 #include "logfile.h"
 #include "loopback.h"
-#include "opencl.h"
+#include "backend.h"
 #include "outfile.h"
 #include "potfile.h"
 #include "rp.h"
diff --git a/src/hwmon.c b/src/hwmon.c
index c8a568c49..d38d4f052 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -47,9 +47,9 @@ static void sysfs_close (hashcat_ctx_t *hashcat_ctx)
 
 static char *hm_SYSFS_get_syspath_device (hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   char *syspath;
 
@@ -1344,16 +1344,16 @@ static int hm_get_adapter_index_nvml (hashcat_ctx_t *hashcat_ctx, HM_ADAPTER_NVM
 
 int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1380,7 +1380,7 @@ int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1404,16 +1404,16 @@ int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
 
 int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1428,7 +1428,7 @@ int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1452,16 +1452,16 @@ int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
 
 int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].temperature_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1511,7 +1511,7 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1535,16 +1535,16 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
 int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].fanpolicy_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1580,7 +1580,7 @@ int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     return 1;
   }
@@ -1593,16 +1593,16 @@ int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
 
 int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].fanspeed_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1658,7 +1658,7 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1682,16 +1682,16 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 
 int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].buslanes_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1724,7 +1724,7 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1748,16 +1748,16 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 
 int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].utilization_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1776,7 +1776,7 @@ int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1800,16 +1800,16 @@ int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
 int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].memoryspeed_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1842,7 +1842,7 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1866,16 +1866,16 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
 int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].corespeed_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1908,7 +1908,7 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
     }
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1932,20 +1932,20 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
 
 int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
 {
-  hwmon_ctx_t  *hwmon_ctx  = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
   if (hwmon_ctx->hm_device[device_id].throttle_get_supported == false) return -1;
 
-  if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
   }
 
-  if (opencl_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1961,7 +1961,7 @@ int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
       clocksThrottleReasons &= ~nvmlClocksThrottleReasonApplicationsClocksSetting;
       clocksThrottleReasons &= ~nvmlClocksThrottleReasonUnknown;
 
-      if (opencl_ctx->kernel_power_final)
+      if (backend_ctx->kernel_power_final)
       {
         clocksThrottleReasons &= ~nvmlClocksThrottleReasonHwSlowdown;
       }
@@ -1999,7 +1999,7 @@ int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 {
   hwmon_ctx_t    *hwmon_ctx    = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
   hwmon_ctx->enabled = false;
@@ -2037,7 +2037,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     hcfree (hm_adapters_sysfs); \
   }
 
-  if (opencl_ctx->need_nvml == true)
+  if (backend_ctx->need_nvml == true)
   {
     hwmon_ctx->hm_nvml = (NVML_PTR *) hcmalloc (sizeof (NVML_PTR));
 
@@ -2049,7 +2049,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if ((opencl_ctx->need_nvapi == true) && (hwmon_ctx->hm_nvml)) // nvapi can't work alone, we need nvml, too
+  if ((backend_ctx->need_nvapi == true) && (hwmon_ctx->hm_nvml)) // nvapi can't work alone, we need nvml, too
   {
     hwmon_ctx->hm_nvapi = (NVAPI_PTR *) hcmalloc (sizeof (NVAPI_PTR));
 
@@ -2061,7 +2061,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if (opencl_ctx->need_adl == true)
+  if (backend_ctx->need_adl == true)
   {
     hwmon_ctx->hm_adl = (ADL_PTR *) hcmalloc (sizeof (ADL_PTR));
 
@@ -2073,7 +2073,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if (opencl_ctx->need_sysfs == true)
+  if (backend_ctx->need_sysfs == true)
   {
     hwmon_ctx->hm_sysfs = (SYSFS_PTR *) hcmalloc (sizeof (SYSFS_PTR));
 
@@ -2102,9 +2102,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
       int tmp_in = hm_get_adapter_index_nvml (hashcat_ctx, nvmlGPUHandle);
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
@@ -2152,9 +2152,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
       int tmp_in = hm_get_adapter_index_nvapi (hashcat_ctx, nvGPUHandle);
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
@@ -2221,9 +2221,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
         return -1;
       }
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
@@ -2271,9 +2271,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     {
       int hm_adapters_id = 0;
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if ((device_param->device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
@@ -2308,15 +2308,15 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
    * save buffer required for later restores
    */
 
-  hwmon_ctx->od_clock_mem_status = (ADLOD6MemClockState *) hccalloc (opencl_ctx->devices_cnt, sizeof (ADLOD6MemClockState));
+  hwmon_ctx->od_clock_mem_status = (ADLOD6MemClockState *) hccalloc (backend_ctx->devices_cnt, sizeof (ADLOD6MemClockState));
 
   /**
    * HM devices: copy
    */
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
-    hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
     if (device_param->skipped == true) continue;
 
diff --git a/src/interface.c b/src/interface.c
index e8d396cd4..6218da192 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -8,7 +8,7 @@
 #include "memory.h"
 #include "event.h"
 #include "shared.h"
-#include "opencl.h"
+#include "backend.h"
 #include "modules.h"
 #include "dynloader.h"
 #include "interface.h"
diff --git a/src/main.c b/src/main.c
index 28ea88165..9cce0b835 100644
--- a/src/main.c
+++ b/src/main.c
@@ -549,7 +549,7 @@ static void main_outerloop_mainscreen (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx,
   event_log_info (hashcat_ctx, NULL);
 }
 
-static void main_opencl_session_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
+static void main_backend_session_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
 
@@ -558,7 +558,7 @@ static void main_opencl_session_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MA
   event_log_info_nn (hashcat_ctx, "Initializing device kernels and memory...");
 }
 
-static void main_opencl_session_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
+static void main_backend_session_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
 
@@ -1022,8 +1022,8 @@ static void event (const u32 id, hashcat_ctx_t *hashcat_ctx, const void *buf, co
     case EVENT_MONITOR_PERFORMANCE_HINT:  main_monitor_performance_hint  (hashcat_ctx, buf, len); break;
     case EVENT_MONITOR_NOINPUT_HINT:      main_monitor_noinput_hint      (hashcat_ctx, buf, len); break;
     case EVENT_MONITOR_NOINPUT_ABORT:     main_monitor_noinput_abort     (hashcat_ctx, buf, len); break;
-    case EVENT_OPENCL_SESSION_POST:       main_opencl_session_post       (hashcat_ctx, buf, len); break;
-    case EVENT_OPENCL_SESSION_PRE:        main_opencl_session_pre        (hashcat_ctx, buf, len); break;
+    case EVENT_OPENCL_SESSION_POST:       main_backend_session_post      (hashcat_ctx, buf, len); break;
+    case EVENT_OPENCL_SESSION_PRE:        main_backend_session_pre       (hashcat_ctx, buf, len); break;
     case EVENT_OPENCL_DEVICE_INIT_POST:   main_opencl_device_init_post   (hashcat_ctx, buf, len); break;
     case EVENT_OPENCL_DEVICE_INIT_PRE:    main_opencl_device_init_pre    (hashcat_ctx, buf, len); break;
     case EVENT_OUTERLOOP_FINISHED:        main_outerloop_finished        (hashcat_ctx, buf, len); break;
diff --git a/src/monitor.c b/src/monitor.c
index 2b942890f..6c5d4577e 100644
--- a/src/monitor.c
+++ b/src/monitor.c
@@ -44,7 +44,7 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
 {
   hashes_t       *hashes        = hashcat_ctx->hashes;
   hwmon_ctx_t    *hwmon_ctx     = hashcat_ctx->hwmon_ctx;
-  opencl_ctx_t   *opencl_ctx    = hashcat_ctx->opencl_ctx;
+  backend_ctx_t  *backend_ctx   = hashcat_ctx->backend_ctx;
   restore_ctx_t  *restore_ctx   = hashcat_ctx->restore_ctx;
   status_ctx_t   *status_ctx    = hashcat_ctx->status_ctx;
   user_options_t *user_options  = hashcat_ctx->user_options;
@@ -114,13 +114,13 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
     {
       hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
-        if ((opencl_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
         const int temperature = hm_get_temperature_with_device_id (hashcat_ctx, device_id);
 
@@ -132,9 +132,9 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
         }
       }
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
@@ -232,9 +232,9 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
 
       hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-      for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
       {
-        hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
         if (device_param->skipped == true) continue;
 
diff --git a/src/mpsp.c b/src/mpsp.c
index a5072776e..c81f6c2e5 100644
--- a/src/mpsp.c
+++ b/src/mpsp.c
@@ -11,7 +11,7 @@
 #include "logfile.h"
 #include "convert.h"
 #include "filehandling.h"
-#include "opencl.h"
+#include "backend.h"
 #include "shared.h"
 #include "ext_lzma.h"
 #include "mpsp.h"
@@ -1224,7 +1224,7 @@ int mask_ctx_update_loop (hashcat_ctx_t *hashcat_ctx)
           return -1;
         }
 
-        const int rc_update_mp = opencl_session_update_mp (hashcat_ctx);
+        const int rc_update_mp = backend_session_update_mp (hashcat_ctx);
 
         if (rc_update_mp == -1) return -1;
       }
@@ -1257,13 +1257,13 @@ int mask_ctx_update_loop (hashcat_ctx_t *hashcat_ctx)
           return -1;
         }
 
-        const int rc_update_mp = opencl_session_update_mp (hashcat_ctx);
+        const int rc_update_mp = backend_session_update_mp (hashcat_ctx);
 
         if (rc_update_mp == -1) return -1;
       }
     }
 
-    const int rc_update_combinator = opencl_session_update_combinator (hashcat_ctx);
+    const int rc_update_combinator = backend_session_update_combinator (hashcat_ctx);
 
     if (rc_update_combinator == -1) return -1;
   }
@@ -1378,7 +1378,7 @@ int mask_ctx_update_loop (hashcat_ctx_t *hashcat_ctx)
         return -1;
       }
 
-      const int rc_update_mp_rl = opencl_session_update_mp_rl (hashcat_ctx, css_cnt_lr[0], css_cnt_lr[1]);
+      const int rc_update_mp_rl = backend_session_update_mp_rl (hashcat_ctx, css_cnt_lr[0], css_cnt_lr[1]);
 
       if (rc_update_mp_rl == -1) return -1;
     }
diff --git a/src/outfile.c b/src/outfile.c
index a7abbce00..06e5c80ed 100644
--- a/src/outfile.c
+++ b/src/outfile.c
@@ -11,7 +11,7 @@
 #include "rp.h"
 #include "emu_inc_rp.h"
 #include "emu_inc_rp_optimized.h"
-#include "opencl.h"
+#include "backend.h"
 #include "shared.h"
 #include "locking.h"
 #include "outfile.h"
diff --git a/src/selftest.c b/src/selftest.c
index 89a527298..9f97aff2c 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -8,7 +8,7 @@
 #include "event.h"
 #include "bitops.h"
 #include "convert.h"
-#include "opencl.h"
+#include "backend.h"
 #include "thread.h"
 #include "selftest.h"
 
@@ -572,15 +572,15 @@ HC_API_CALL void *thread_selftest (void *p)
 
   hashcat_ctx_t *hashcat_ctx = thread_param->hashcat_ctx;
 
-  opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  if (opencl_ctx->enabled == false) return NULL;
+  if (backend_ctx->enabled == false) return NULL;
 
   user_options_t *user_options = hashcat_ctx->user_options;
 
   if (user_options->self_test_disable == true) return NULL;
 
-  hc_device_param_t *device_param = opencl_ctx->devices_param + thread_param->tid;
+  hc_device_param_t *device_param = backend_ctx->devices_param + thread_param->tid;
 
   if (device_param->skipped == true) return NULL;
 
diff --git a/src/status.c b/src/status.c
index ed4cb9281..3086c6066 100644
--- a/src/status.c
+++ b/src/status.c
@@ -200,32 +200,32 @@ double get_avg_exec_time (hc_device_param_t *device_param, const int last_num_en
 
 int status_get_device_info_cnt (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  return opencl_ctx->devices_cnt;
+  return backend_ctx->devices_cnt;
 }
 
 int status_get_device_info_active (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  return opencl_ctx->devices_active;
+  return backend_ctx->devices_active;
 }
 
 bool status_get_skipped_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   return device_param->skipped;
 }
 
 bool status_get_skipped_warning_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   return device_param->skipped_warning;
 }
@@ -836,13 +836,13 @@ int status_get_guess_mask_length (const hashcat_ctx_t *hashcat_ctx)
 char *status_get_guess_candidates_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
   const hashconfig_t         *hashconfig         = hashcat_ctx->hashconfig;
-  const opencl_ctx_t         *opencl_ctx         = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t        *backend_ctx        = hashcat_ctx->backend_ctx;
   const status_ctx_t         *status_ctx         = hashcat_ctx->status_ctx;
   const user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
 
   if (status_ctx->accessible == false) return NULL;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   char *display = (char *) hcmalloc (HCBUFSIZ_TINY);
 
@@ -1410,11 +1410,11 @@ u64 status_get_progress_end_relative_skip (const hashcat_ctx_t *hashcat_ctx)
 
 double status_get_hashes_msec_all (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   double hashes_all_msec = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     hashes_all_msec += status_get_hashes_msec_dev (hashcat_ctx, device_id);
   }
@@ -1424,12 +1424,12 @@ double status_get_hashes_msec_all (const hashcat_ctx_t *hashcat_ctx)
 
 double status_get_hashes_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64    speed_cnt  = 0;
   double speed_msec = 0;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if ((device_param->skipped == false) && (device_param->skipped_warning == false))
   {
@@ -1459,12 +1459,12 @@ double status_get_hashes_msec_dev_benchmark (const hashcat_ctx_t *hashcat_ctx, c
 {
   // this function increases accuracy for benchmark modes
 
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64    speed_cnt  = 0;
   double speed_msec = 0;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if ((device_param->skipped == false) && (device_param->skipped_warning == false))
   {
@@ -1486,11 +1486,11 @@ double status_get_hashes_msec_dev_benchmark (const hashcat_ctx_t *hashcat_ctx, c
 
 double status_get_exec_msec_all (const hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   double exec_all_msec = 0;
 
-  for (u32 device_id = 0; device_id < opencl_ctx->devices_cnt; device_id++)
+  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
   {
     exec_all_msec += status_get_exec_msec_dev (hashcat_ctx, device_id);
   }
@@ -1500,9 +1500,9 @@ double status_get_exec_msec_all (const hashcat_ctx_t *hashcat_ctx)
 
 double status_get_exec_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   double exec_dev_msec = 0;
 
@@ -1700,9 +1700,9 @@ char *status_get_cpt (const hashcat_ctx_t *hashcat_ctx)
 
 int status_get_salt_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int salt_pos = 0;
 
@@ -1716,9 +1716,9 @@ int status_get_salt_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_
 
 int status_get_innerloop_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int innerloop_pos = 0;
 
@@ -1732,9 +1732,9 @@ int status_get_innerloop_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int de
 
 int status_get_innerloop_left_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int innerloop_left = 0;
 
@@ -1748,9 +1748,9 @@ int status_get_innerloop_left_dev (const hashcat_ctx_t *hashcat_ctx, const int d
 
 int status_get_iteration_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int iteration_pos = 0;
 
@@ -1764,9 +1764,9 @@ int status_get_iteration_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int de
 
 int status_get_iteration_left_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int iteration_left = 0;
 
@@ -1781,9 +1781,9 @@ int status_get_iteration_left_dev (const hashcat_ctx_t *hashcat_ctx, const int d
 #ifdef WITH_BRAIN
 int status_get_brain_link_client_id_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int brain_client_id = -1;
 
@@ -1797,9 +1797,9 @@ int status_get_brain_link_client_id_dev (const hashcat_ctx_t *hashcat_ctx, const
 
 int status_get_brain_link_status_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   int brain_link_status_dev = 0;
 
@@ -1815,9 +1815,9 @@ int status_get_brain_link_status_dev (const hashcat_ctx_t *hashcat_ctx, const in
 
 char *status_get_brain_link_recv_bytes_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   u64 brain_link_recv_bytes = 0;
 
@@ -1835,9 +1835,9 @@ char *status_get_brain_link_recv_bytes_dev (const hashcat_ctx_t *hashcat_ctx, co
 
 char *status_get_brain_link_send_bytes_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   u64 brain_link_send_bytes = 0;
 
@@ -1855,9 +1855,9 @@ char *status_get_brain_link_send_bytes_dev (const hashcat_ctx_t *hashcat_ctx, co
 
 char *status_get_brain_link_recv_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   u64 brain_link_recv_bytes = 0;
 
@@ -1882,9 +1882,9 @@ char *status_get_brain_link_recv_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx
 
 char *status_get_brain_link_send_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   u64 brain_link_send_bytes = 0;
 
@@ -1910,9 +1910,9 @@ char *status_get_brain_link_send_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx
 
 char *status_get_hwmon_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   char *output_buf = (char *) hcmalloc (HCBUFSIZ_TINY);
 
@@ -1983,9 +1983,9 @@ char *status_get_hwmon_dev (const hashcat_ctx_t *hashcat_ctx, const int device_i
 
 int status_get_corespeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return -1;
 
@@ -2004,9 +2004,9 @@ int status_get_corespeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device
 
 int status_get_memoryspeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return -1;
 
@@ -2025,9 +2025,9 @@ int status_get_memoryspeed_dev (const hashcat_ctx_t *hashcat_ctx, const int devi
 
 u64 status_get_progress_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2038,9 +2038,9 @@ u64 status_get_progress_dev (const hashcat_ctx_t *hashcat_ctx, const int device_
 
 double status_get_runtime_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2051,9 +2051,9 @@ double status_get_runtime_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int
 
 int status_get_kernel_accel_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2066,9 +2066,9 @@ int status_get_kernel_accel_dev (const hashcat_ctx_t *hashcat_ctx, const int dev
 
 int status_get_kernel_loops_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2081,9 +2081,9 @@ int status_get_kernel_loops_dev (const hashcat_ctx_t *hashcat_ctx, const int dev
 
 int status_get_kernel_threads_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
@@ -2094,9 +2094,9 @@ int status_get_kernel_threads_dev (const hashcat_ctx_t *hashcat_ctx, const int d
 
 int status_get_vector_width_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
   if (device_param->skipped == true) return 0;
 
diff --git a/src/stdout.c b/src/stdout.c
index dd31d3034..d3fbcb0c7 100644
--- a/src/stdout.c
+++ b/src/stdout.c
@@ -10,7 +10,7 @@
 #include "emu_inc_rp.h"
 #include "emu_inc_rp_optimized.h"
 #include "mpsp.h"
-#include "opencl.h"
+#include "backend.h"
 #include "shared.h"
 #include "stdout.h"
 
diff --git a/src/terminal.c b/src/terminal.c
index 94b5b223a..52cf341e3 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -656,17 +656,17 @@ void example_hashes (hashcat_ctx_t *hashcat_ctx)
 
 void opencl_info (hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   event_log_info (hashcat_ctx, "OpenCL Info:");
   event_log_info (hashcat_ctx, NULL);
 
-  cl_uint         platforms_cnt         = opencl_ctx->platforms_cnt;
-  cl_platform_id *platforms             = opencl_ctx->platforms;
-  char          **platforms_vendor      = opencl_ctx->platforms_vendor;
-  char          **platforms_name        = opencl_ctx->platforms_name;
-  char          **platforms_version     = opencl_ctx->platforms_version;
-  cl_uint         devices_cnt           = opencl_ctx->devices_cnt;
+  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
+  cl_platform_id *platforms             = backend_ctx->platforms;
+  char          **platforms_vendor      = backend_ctx->platforms_vendor;
+  char          **platforms_name        = backend_ctx->platforms_name;
+  char          **platforms_version     = backend_ctx->platforms_version;
+  cl_uint         devices_cnt           = backend_ctx->devices_cnt;
 
   for (cl_uint platforms_idx = 0; platforms_idx < platforms_cnt; platforms_idx++)
   {
@@ -683,7 +683,7 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
 
     for (cl_uint devices_idx = 0; devices_idx < devices_cnt; devices_idx++)
     {
-      const hc_device_param_t *device_param = opencl_ctx->devices_param + devices_idx;
+      const hc_device_param_t *device_param = backend_ctx->devices_param + devices_idx;
 
       if (device_param->platform != platform_id) continue;
 
@@ -717,18 +717,18 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
 
 void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
 {
-  const opencl_ctx_t   *opencl_ctx   = hashcat_ctx->opencl_ctx;
+  const backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   const user_options_t *user_options = hashcat_ctx->user_options;
 
   if (user_options->quiet            == true) return;
   if (user_options->machine_readable == true) return;
   if (user_options->status_json      == true) return;
 
-  cl_uint         platforms_cnt         = opencl_ctx->platforms_cnt;
-  cl_platform_id *platforms             = opencl_ctx->platforms;
-  char          **platforms_vendor      = opencl_ctx->platforms_vendor;
-  bool           *platforms_skipped     = opencl_ctx->platforms_skipped;
-  cl_uint         devices_cnt           = opencl_ctx->devices_cnt;
+  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
+  cl_platform_id *platforms             = backend_ctx->platforms;
+  char          **platforms_vendor      = backend_ctx->platforms_vendor;
+  bool           *platforms_skipped     = backend_ctx->platforms_skipped;
+  cl_uint         devices_cnt           = backend_ctx->devices_cnt;
 
   for (cl_uint platforms_idx = 0; platforms_idx < platforms_cnt; platforms_idx++)
   {
@@ -755,7 +755,7 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
 
     for (cl_uint devices_idx = 0; devices_idx < devices_cnt; devices_idx++)
     {
-      const hc_device_param_t *device_param = opencl_ctx->devices_param + devices_idx;
+      const hc_device_param_t *device_param = backend_ctx->devices_param + devices_idx;
 
       if (device_param->platform != platform_id) continue;
 
diff --git a/src/user_options.c b/src/user_options.c
index 755e636db..6b6984ff6 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -11,7 +11,7 @@
 #include "interface.h"
 #include "shared.h"
 #include "usage.h"
-#include "opencl.h"
+#include "backend.h"
 #include "user_options.h"
 
 #ifdef WITH_BRAIN

From 4045e600215036957c4f657ef528bb6da6ca53ef Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 10:03:16 +0200
Subject: [PATCH 02/73] Add nvrtc wrapper for later use

---
 OpenCL/inc_platform.h |  6 +--
 docs/changes.txt      |  1 +
 docs/readme.txt       |  2 +-
 include/ext_nvrtc.h   | 81 +++++++++++++++++++++++++++++++++++
 include/types.h       |  2 +
 src/Makefile          |  2 +-
 src/backend.c         | 99 ++++++++++++++++++++++++++++++++++++++++++-
 src/ext_nvrtc.c       |  8 ++++
 8 files changed, 193 insertions(+), 8 deletions(-)
 create mode 100644 include/ext_nvrtc.h
 create mode 100644 src/ext_nvrtc.c

diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index fd3d310d1..7d27852d9 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -4,6 +4,7 @@
  */
 
 #ifndef _INC_PLATFORM_H
+#define _INC_PLATFORM_H
 
 #ifdef IS_CUDA
 DECLSPEC u32    atomic_dec      (u32 *p);
@@ -11,11 +12,6 @@ DECLSPEC u32    atomic_inc      (u32 *p);
 DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
 DECLSPEC size_t get_local_id    (const u32 dimindx __attribute__((unused)));
 DECLSPEC size_t get_local_size  (const u32 dimindx __attribute__((unused)));
-DECLSPEC uint4  uint4_init      (const u32 a);
-DECLSPEC uint4  uint4_init      (const u32 a, const u32 b, const u32 c, const u32 d);
-DECLSPEC __inline__ u8    rotate (const u8  v, const int i);
-DECLSPEC __inline__ u32   rotate (const u32 v, const int i);
-DECLSPEC __inline__ u64   rotate (const u64 v, const int i);
 
 #define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n))))
 #define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))
diff --git a/docs/changes.txt b/docs/changes.txt
index 6f6b68b91..d18141fc4 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -73,6 +73,7 @@
 - Kernel Compile: Removed -cl-std= from all kernel build options since we're compatible to all OpenCL versions
 - Mode 16800/16801 hash format: Changed separator character from '*' to ':'
 - Requirements: Update runtime check for minimum NVIDIA driver version from 367.x to 418.56 or later
+- Requirements: Add new requirement for NVIDIA GPU: CUDA Toolkit (10.1 or later)
 
 * changes v5.0.0 -> v5.1.0
 
diff --git a/docs/readme.txt b/docs/readme.txt
index 2614bdf0b..ae10eb231 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -6,7 +6,7 @@ AMD GPUs on Windows require "AMD Radeon Software Crimson Edition" (15.12 or late
 Intel CPUs require "OpenCL Runtime for Intel Core and Intel Xeon Processors" (16.1.1 or later)
 Intel GPUs on Linux require "OpenCL 2.0 GPU Driver Package for Linux" (2.0 or later)
 Intel GPUs on Windows require "OpenCL Driver for Intel Iris and Intel HD Graphics"
-NVIDIA GPUs require "NVIDIA Driver" (418.56 or later)
+NVIDIA GPUs require "NVIDIA Driver" (418.56 or later) and "CUDA Toolkit" (10.1 or later)
 
 ##
 ## Features
diff --git a/include/ext_nvrtc.h b/include/ext_nvrtc.h
new file mode 100644
index 000000000..407170c16
--- /dev/null
+++ b/include/ext_nvrtc.h
@@ -0,0 +1,81 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _EXT_NVRTC_H
+#define _EXT_NVRTC_H
+
+/**
+ * from cuda.h (/usr/local/cuda-10.1/targets/x86_64-linux/include/nvrtc.h)
+ */
+
+/**
+ * \ingroup error
+ * \brief   The enumerated type nvrtcResult defines API call result codes.
+ *          NVRTC API functions return nvrtcResult to indicate the call
+ *          result.
+ */
+typedef enum {
+  NVRTC_SUCCESS = 0,
+  NVRTC_ERROR_OUT_OF_MEMORY = 1,
+  NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
+  NVRTC_ERROR_INVALID_INPUT = 3,
+  NVRTC_ERROR_INVALID_PROGRAM = 4,
+  NVRTC_ERROR_INVALID_OPTION = 5,
+  NVRTC_ERROR_COMPILATION = 6,
+  NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
+  NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
+  NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
+  NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
+  NVRTC_ERROR_INTERNAL_ERROR = 11
+} nvrtcResult;
+
+/**
+ * \ingroup compilation
+ * \brief   nvrtcProgram is the unit of compilation, and an opaque handle for
+ *          a program.
+ *
+ * To compile a CUDA program string, an instance of nvrtcProgram must be
+ * created first with ::nvrtcCreateProgram, then compiled with
+ * ::nvrtcCompileProgram.
+ */
+typedef struct _nvrtcProgram *nvrtcProgram;
+
+#ifdef _WIN32
+#define NVRTCAPI __stdcall
+#else
+#define NVRTCAPI
+#endif
+
+#define NVRTC_API_CALL NVRTCAPI
+
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCADDNAMEEXPRESSION)  (nvrtcProgram, const char *);
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCCOMPILEPROGRAM)     (nvrtcProgram, int, const char **);
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCCREATEPROGRAM)      (nvrtcProgram *, const char *, const char *, int, const char **, const char **);
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCDESTROYPROGRAM)     (nvrtcProgram *);
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETLOWEREDNAME)     (nvrtcProgram, const char *, const char **);
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETPTX)             (nvrtcProgram, char *);
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETPTXSIZE)         (nvrtcProgram, size_t *);
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETPROGRAMLOG)      (nvrtcProgram, char *);
+typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETPROGRAMLOGSIZE)  (nvrtcProgram, size_t *);
+
+typedef struct hc_nvrtc_lib
+{
+  hc_dynlib_t lib;
+
+  NVRTC_NVRTCADDNAMEEXPRESSION  nvrtcAddNameExpression;
+  NVRTC_NVRTCCOMPILEPROGRAM     nvrtcCompileProgram;
+  NVRTC_NVRTCCREATEPROGRAM      nvrtcCreateProgram;
+  NVRTC_NVRTCDESTROYPROGRAM     nvrtcDestroyProgram;
+  NVRTC_NVRTCGETLOWEREDNAME     nvrtcGetLoweredName;
+  NVRTC_NVRTCGETPTX             nvrtcGetPTX;
+  NVRTC_NVRTCGETPTXSIZE         nvrtcGetPTXSize;
+  NVRTC_NVRTCGETPROGRAMLOG      nvrtcGetProgramLog;
+  NVRTC_NVRTCGETPROGRAMLOGSIZE  nvrtcGetProgramLogSize;
+
+} hc_nvrtc_lib_t;
+
+typedef hc_nvrtc_lib_t NVRTC_PTR;
+
+#endif // _EXT_NVRTC_H
diff --git a/include/types.h b/include/types.h
index f35eca3f7..0f0ecf1b4 100644
--- a/include/types.h
+++ b/include/types.h
@@ -989,6 +989,7 @@ typedef struct link_speed
 
 } link_speed_t;
 
+#include "ext_nvrtc.h"
 #include "ext_cuda.h"
 #include "ext_OpenCL.h"
 
@@ -1335,6 +1336,7 @@ typedef struct backend_ctx
 
   void               *ocl;
   void               *cuda;
+  void               *nvrtc;
 
   cl_uint             platforms_cnt;
   cl_platform_id     *platforms;
diff --git a/src/Makefile b/src/Makefile
index 196ad6c89..3f90fafe0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -280,7 +280,7 @@ EMU_OBJS_ALL            += emu_inc_truecrypt_crc32 emu_inc_truecrypt_keyfile emu
 EMU_OBJS_ALL            += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512
 EMU_OBJS_ALL            += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish
 
-OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
+OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_nvrtc ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
 
 ifeq ($(ENABLE_BRAIN),1)
 OBJS_ALL                += brain
diff --git a/src/backend.c b/src/backend.c
index 41cf2b645..e7187c94d 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -543,6 +543,73 @@ void generate_cached_kernel_amp_filename (const u32 attack_kern, char *profile_d
   snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.kernel", profile_dir, attack_kern, device_name_chksum_amp_mp);
 }
 
+int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  memset (nvrtc, 0, sizeof (NVRTC_PTR));
+
+  #if   defined (_WIN)
+  nvrtc->lib = hc_dlopen ("nvrtc");
+  #elif defined (__APPLE__)
+  nvrtc->lib = hc_dlopen ("/System/Library/Frameworks/NVRTC.framework/NVRTC");
+  #elif defined (__CYGWIN__)
+  nvrtc->lib = hc_dlopen ("nvrtc.dll");
+
+  if (nvrtc->lib == NULL) nvrtc->lib = hc_dlopen ("cygnvrtc-1.dll");
+  #else
+  nvrtc->lib = hc_dlopen ("libnvrtc.so");
+
+  if (nvrtc->lib == NULL) nvrtc->lib = hc_dlopen ("libnvrtc.so.1");
+  #endif
+
+  if (nvrtc->lib == NULL)
+  {
+    event_log_error (hashcat_ctx, "Cannot find NVRTC library.");
+
+    event_log_warning (hashcat_ctx, "You are probably missing the native CUDA SDK and/or driver for your platform.");
+    event_log_warning (hashcat_ctx, "NVIDIA GPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
+    event_log_warning (hashcat_ctx, NULL);
+
+    return -1;
+  }
+
+  HC_LOAD_FUNC (nvrtc, nvrtcAddNameExpression,  NVRTC_NVRTCADDNAMEEXPRESSION, NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcCompileProgram,     NVRTC_NVRTCCOMPILEPROGRAM,    NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcCreateProgram,      NVRTC_NVRTCCREATEPROGRAM,     NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcDestroyProgram,     NVRTC_NVRTCDESTROYPROGRAM,    NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcGetLoweredName,     NVRTC_NVRTCGETLOWEREDNAME,    NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcGetPTX,             NVRTC_NVRTCGETPTX,            NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcGetPTXSize,         NVRTC_NVRTCGETPTXSIZE,        NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcGetProgramLog,      NVRTC_NVRTCGETPROGRAMLOG,     NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcGetProgramLogSize,  NVRTC_NVRTCGETPROGRAMLOGSIZE, NVRTC, 1);
+
+  return 0;
+}
+
+void nvrtc_close (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  if (nvrtc)
+  {
+    if (nvrtc->lib)
+    {
+      hc_dlclose (nvrtc->lib);
+    }
+
+    hcfree (backend_ctx->nvrtc);
+
+    backend_ctx->nvrtc = NULL;
+  }
+}
+
 int cuda_init (hashcat_ctx_t *hashcat_ctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -572,6 +639,7 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
     event_log_warning (hashcat_ctx, "You are probably missing the native CUDA runtime or driver for your platform.");
     event_log_warning (hashcat_ctx, "NVIDIA GPUs require this runtime and/or driver:");
     event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
     event_log_warning (hashcat_ctx, NULL);
 
     return -1;
@@ -702,6 +770,7 @@ int ocl_init (hashcat_ctx_t *hashcat_ctx)
 
     event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver:");
     event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
     event_log_warning (hashcat_ctx, NULL);
 
     return -1;
@@ -3071,6 +3140,31 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     cuda_close (hashcat_ctx);
   }
 
+  /**
+   * Load and map NVRTC library calls
+   */
+
+  NVRTC_PTR *nvrtc = (NVRTC_PTR *) hcmalloc (sizeof (NVRTC_PTR));
+
+  backend_ctx->nvrtc = nvrtc;
+
+  const int rc_nvrtc_init = nvrtc_init (hashcat_ctx);
+
+  if (rc_nvrtc_init == -1)
+  {
+    nvrtc_close (hashcat_ctx);
+  }
+
+  /**
+   * Check if both CUDA and NVRTC were load successful
+   */
+
+  if ((rc_cuda_init == -1) || (rc_nvrtc_init == -1))
+  {
+    cuda_close  (hashcat_ctx);
+    nvrtc_close (hashcat_ctx);
+  }
+
   /**
    * Load and map OpenCL library calls
    */
@@ -3086,6 +3180,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     ocl_close (hashcat_ctx);
   }
 
+
   /**
    * return if both CUDA and OpenCL initialization failed
    */
@@ -3199,6 +3294,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
     event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver:");
     event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
     event_log_warning (hashcat_ctx, NULL);
 
     FREE_OPENCL_CTX_ON_ERROR;
@@ -3300,7 +3396,8 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return;
 
-  ocl_close (hashcat_ctx);
+  cuda_close (hashcat_ctx);
+  ocl_close  (hashcat_ctx);
 
   hcfree (backend_ctx->devices_param);
 
diff --git a/src/ext_nvrtc.c b/src/ext_nvrtc.c
new file mode 100644
index 000000000..17e6ff03b
--- /dev/null
+++ b/src/ext_nvrtc.c
@@ -0,0 +1,8 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "ext_nvrtc.h"

From 9faba41848e269d7eb69f9280e7ca26662d78496 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 13:28:44 +0200
Subject: [PATCH 03/73] Use nvrtc to compile PTX (resulting PTX not yet used)

---
 OpenCL/inc_common.cl   |  24 ++--
 OpenCL/inc_platform.cl |   8 ++
 OpenCL/inc_platform.h  |   1 +
 OpenCL/inc_types.h     |  60 ++++----
 OpenCL/inc_vendor.h    |   4 -
 include/backend.h      |  19 ++-
 include/ext_nvrtc.h    |  22 +--
 src/backend.c          | 301 +++++++++++++++++++++++++++++++++++------
 src/convert.c          |  24 ++--
 src/ext_nvrtc.c        |  19 +++
 10 files changed, 371 insertions(+), 111 deletions(-)

diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index a87649b3e..844e4ba12 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -18,7 +18,7 @@ DECLSPEC u8 v8a_from_v32_S (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v8a;
+  return v.v8.a;
 }
 
 DECLSPEC u8 v8b_from_v32_S (const u32 v32)
@@ -27,7 +27,7 @@ DECLSPEC u8 v8b_from_v32_S (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v8b;
+  return v.v8.b;
 }
 
 DECLSPEC u8 v8c_from_v32_S (const u32 v32)
@@ -36,7 +36,7 @@ DECLSPEC u8 v8c_from_v32_S (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v8c;
+  return v.v8.c;
 }
 
 DECLSPEC u8 v8d_from_v32_S (const u32 v32)
@@ -45,7 +45,7 @@ DECLSPEC u8 v8d_from_v32_S (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v8d;
+  return v.v8.d;
 }
 
 DECLSPEC u16 v16a_from_v32_S (const u32 v32)
@@ -54,7 +54,7 @@ DECLSPEC u16 v16a_from_v32_S (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v16a;
+  return v.v16.a;
 }
 
 DECLSPEC u16 v16b_from_v32_S (const u32 v32)
@@ -63,15 +63,15 @@ DECLSPEC u16 v16b_from_v32_S (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v16b;
+  return v.v16.b;
 }
 
 DECLSPEC u32 v32_from_v16ab_S (const u16 v16a, const u16 v16b)
 {
   vconv32_t v;
 
-  v.v16a = v16a;
-  v.v16b = v16b;
+  v.v16.a = v16a;
+  v.v16.b = v16b;
 
   return v.v32;
 }
@@ -82,7 +82,7 @@ DECLSPEC u32 v32a_from_v64_S (const u64 v64)
 
   v.v64 = v64;
 
-  return v.v32a;
+  return v.v32.a;
 }
 
 DECLSPEC u32 v32b_from_v64_S (const u64 v64)
@@ -91,15 +91,15 @@ DECLSPEC u32 v32b_from_v64_S (const u64 v64)
 
   v.v64 = v64;
 
-  return v.v32b;
+  return v.v32.b;
 }
 
 DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b)
 {
   vconv64_t v;
 
-  v.v32a = v32a;
-  v.v32b = v32b;
+  v.v32.a = v32a;
+  v.v32.b = v32b;
 
   return v.v64;
 }
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index 5c6d9780e..ceb12a4f1 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -8,6 +8,7 @@
 #include "inc_platform.h"
 
 #ifdef IS_NATIVE
+#define SYNC_THREADS()
 #endif
 
 #ifdef IS_CUDA
@@ -22,6 +23,11 @@ DECLSPEC u32 atomic_inc (u32 *p)
   return atomicAdd (p, 1);
 }
 
+DECLSPEC u32 atomic_or (u32 *p, u32 val)
+{
+  return atomicOr (p, val);
+}
+
 DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
 {
   return blockDim.x * blockIdx.x + threadIdx.x;
@@ -38,7 +44,9 @@ DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
   return blockDim.x;
 }
 
+#define SYNC_THREADS() __syncthreads ()
 #endif
 
 #ifdef IS_OPENCL
+#define SYNC_THREADS() barrier (CLK_LOCAL_MEM_FENCE)
 #endif
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index 7d27852d9..a8ce27fef 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -9,6 +9,7 @@
 #ifdef IS_CUDA
 DECLSPEC u32    atomic_dec      (u32 *p);
 DECLSPEC u32    atomic_inc      (u32 *p);
+DECLSPEC u32    atomic_or       (u32 *p, u32 val);
 DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
 DECLSPEC size_t get_local_id    (const u32 dimindx __attribute__((unused)));
 DECLSPEC size_t get_local_size  (const u32 dimindx __attribute__((unused)));
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 0e254c24a..867bb728e 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -7,7 +7,10 @@
 #define _INC_TYPES_H
 
 #ifdef IS_CUDA
-typedef unsigned char uchar;
+typedef unsigned char  uchar;
+typedef unsigned short ushort;
+typedef unsigned int   uint;
+typedef unsigned long  ulong;
 #endif
 
 #ifdef KERNEL_STATIC
@@ -62,17 +65,19 @@ typedef union vconv32
 
   struct
   {
-    u16 v16a;
-    u16 v16b;
-  };
+    u16 a;
+    u16 b;
+
+  } v16;
 
   struct
   {
-    u8 v8a;
-    u8 v8b;
-    u8 v8c;
-    u8 v8d;
-  };
+    u8 a;
+    u8 b;
+    u8 c;
+    u8 d;
+
+  } v8;
 
 } vconv32_t;
 
@@ -82,29 +87,32 @@ typedef union vconv64
 
   struct
   {
-    u32 v32a;
-    u32 v32b;
-  };
+    u32 a;
+    u32 b;
+
+  } v32;
 
   struct
   {
-    u16 v16a;
-    u16 v16b;
-    u16 v16c;
-    u16 v16d;
-  };
+    u16 a;
+    u16 b;
+    u16 c;
+    u16 d;
+
+  } v16;
 
   struct
   {
-    u8 v8a;
-    u8 v8b;
-    u8 v8c;
-    u8 v8d;
-    u8 v8e;
-    u8 v8f;
-    u8 v8g;
-    u8 v8h;
-  };
+    u8 a;
+    u8 b;
+    u8 c;
+    u8 d;
+    u8 e;
+    u8 f;
+    u8 g;
+    u8 h;
+
+  } v8;
 
 } vconv64_t;
 
diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index b2bbd9037..f8def9bd2 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -103,15 +103,11 @@
 #if defined IS_CPU
 #define DECLSPEC inline
 #elif defined IS_GPU
-#if defined IS_CUDA
-#define DECLSPEC __device__
-#else
 #if defined IS_AMD
 #define DECLSPEC inline static
 #else
 #define DECLSPEC
 #endif
-#endif
 #else
 #define DECLSPEC
 #endif
diff --git a/include/backend.h b/include/backend.h
index 9b25a3496..7141288fe 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -22,11 +22,22 @@ static const char CL_VENDOR_MESA[]            = "Mesa";
 static const char CL_VENDOR_NV[]              = "NVIDIA Corporation";
 static const char CL_VENDOR_POCL[]            = "The pocl project";
 
-int  cuda_init  (hashcat_ctx_t *hashcat_ctx);
-void cuda_close (hashcat_ctx_t *hashcat_ctx);
+int  cuda_init   (hashcat_ctx_t *hashcat_ctx);
+void cuda_close  (hashcat_ctx_t *hashcat_ctx);
 
-int  ocl_init   (hashcat_ctx_t *hashcat_ctx);
-void ocl_close  (hashcat_ctx_t *hashcat_ctx);
+int  nvrtc_init  (hashcat_ctx_t *hashcat_ctx);
+void nvrtc_close (hashcat_ctx_t *hashcat_ctx);
+
+int  ocl_init    (hashcat_ctx_t *hashcat_ctx);
+void ocl_close   (hashcat_ctx_t *hashcat_ctx);
+
+int hc_nvrtcCreateProgram        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
+int hc_nvrtcDestroyProgram       (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog);
+int hc_nvrtcCompileProgram       (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options);
+int hc_nvrtcGetProgramLogSize    (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet);
+int hc_nvrtcGetProgramLog        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log);
+int hc_nvrtcGetPTXSize           (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet);
+int hc_nvrtcGetPTX               (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
diff --git a/include/ext_nvrtc.h b/include/ext_nvrtc.h
index 407170c16..7bbbbd15a 100644
--- a/include/ext_nvrtc.h
+++ b/include/ext_nvrtc.h
@@ -50,15 +50,16 @@ typedef struct _nvrtcProgram *nvrtcProgram;
 
 #define NVRTC_API_CALL NVRTCAPI
 
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCADDNAMEEXPRESSION)  (nvrtcProgram, const char *);
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCCOMPILEPROGRAM)     (nvrtcProgram, int, const char **);
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCCREATEPROGRAM)      (nvrtcProgram *, const char *, const char *, int, const char **, const char **);
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCDESTROYPROGRAM)     (nvrtcProgram *);
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETLOWEREDNAME)     (nvrtcProgram, const char *, const char **);
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETPTX)             (nvrtcProgram, char *);
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETPTXSIZE)         (nvrtcProgram, size_t *);
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETPROGRAMLOG)      (nvrtcProgram, char *);
-typedef nvrtcResult (NVRTC_API_CALL *NVRTC_NVRTCGETPROGRAMLOGSIZE)  (nvrtcProgram, size_t *);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCADDNAMEEXPRESSION)  (nvrtcProgram, const char * const);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCCOMPILEPROGRAM)     (nvrtcProgram, int, const char * const *);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCCREATEPROGRAM)      (nvrtcProgram *, const char *, const char *, int, const char * const *, const char * const *);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCDESTROYPROGRAM)     (nvrtcProgram *);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCGETLOWEREDNAME)     (nvrtcProgram, const char * const, const char **);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCGETPTX)             (nvrtcProgram, char *);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCGETPTXSIZE)         (nvrtcProgram, size_t *);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCGETPROGRAMLOG)      (nvrtcProgram, char *);
+typedef nvrtcResult  (NVRTC_API_CALL *NVRTC_NVRTCGETPROGRAMLOGSIZE)  (nvrtcProgram, size_t *);
+typedef const char * (NVRTC_API_CALL *NVRTC_NVRTCGETERRORSTRING)     (nvrtcResult);
 
 typedef struct hc_nvrtc_lib
 {
@@ -73,9 +74,12 @@ typedef struct hc_nvrtc_lib
   NVRTC_NVRTCGETPTXSIZE         nvrtcGetPTXSize;
   NVRTC_NVRTCGETPROGRAMLOG      nvrtcGetProgramLog;
   NVRTC_NVRTCGETPROGRAMLOGSIZE  nvrtcGetProgramLogSize;
+  NVRTC_NVRTCGETERRORSTRING     nvrtcGetErrorString;
 
 } hc_nvrtc_lib_t;
 
 typedef hc_nvrtc_lib_t NVRTC_PTR;
 
+int nvrtc_make_options_array_from_string (char *string, char **options);
+
 #endif // _EXT_NVRTC_H
diff --git a/src/backend.c b/src/backend.c
index e7187c94d..efb118614 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -587,6 +587,7 @@ int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
   HC_LOAD_FUNC (nvrtc, nvrtcGetPTXSize,         NVRTC_NVRTCGETPTXSIZE,        NVRTC, 1);
   HC_LOAD_FUNC (nvrtc, nvrtcGetProgramLog,      NVRTC_NVRTCGETPROGRAMLOG,     NVRTC, 1);
   HC_LOAD_FUNC (nvrtc, nvrtcGetProgramLogSize,  NVRTC_NVRTCGETPROGRAMLOGSIZE, NVRTC, 1);
+  HC_LOAD_FUNC (nvrtc, nvrtcGetErrorString,     NVRTC_NVRTCGETERRORSTRING,    NVRTC, 1);
 
   return 0;
 }
@@ -831,6 +832,132 @@ void ocl_close (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
+int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  const nvrtcResult NVRTC_err = nvrtc->nvrtcCreateProgram (prog, src, name, numHeaders, headers, includeNames);
+
+  if (NVRTC_err != NVRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "nvrtcCreateProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  const nvrtcResult NVRTC_err = nvrtc->nvrtcDestroyProgram (prog);
+
+  if (NVRTC_err != NVRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "nvrtcDestroyProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  const nvrtcResult NVRTC_err = nvrtc->nvrtcCompileProgram (prog, numOptions, options);
+
+  if (NVRTC_err != NVRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "nvrtcCompileProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  const nvrtcResult NVRTC_err = nvrtc->nvrtcGetProgramLogSize (prog, logSizeRet);
+
+  if (NVRTC_err != NVRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "nvrtcGetProgramLogSize(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  const nvrtcResult NVRTC_err = nvrtc->nvrtcGetProgramLog (prog, log);
+
+  if (NVRTC_err != NVRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "nvrtcGetProgramLog(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  const nvrtcResult NVRTC_err = nvrtc->nvrtcGetPTXSize (prog, ptxSizeRet);
+
+  if (NVRTC_err != NVRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "nvrtcGetPTXSize(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  const nvrtcResult NVRTC_err = nvrtc->nvrtcGetPTX (prog, ptx);
+
+  if (NVRTC_err != NVRTC_SUCCESS)
+  {
+    event_log_error (hashcat_ctx, "nvrtcGetPTX(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err));
+
+    return -1;
+  }
+
+  return 0;
+}
+
 int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -3180,7 +3307,6 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     ocl_close (hashcat_ctx);
   }
 
-
   /**
    * return if both CUDA and OpenCL initialization failed
    */
@@ -4911,6 +5037,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (vector_width > 16) vector_width = 16;
 
+    // CUDA currently support only scalar types
+
+    if (backend_ctx->cuda)
+    {
+      vector_width = 1;
+    }
+
     device_param->vector_width = vector_width;
 
     /**
@@ -5349,65 +5482,145 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (rc_read_kernel == false) return -1;
 
-        CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->device, build_options_module_buf, NULL, NULL);
-
-        //if (CL_rc == -1) return -1;
-
-        size_t build_log_size = 0;
-
-        hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
-
-        //if (CL_rc == -1) return -1;
-
-        #if defined (DEBUG)
-        if ((build_log_size > 1) || (CL_rc == -1))
-        #else
-        if (CL_rc == -1)
-        #endif
+        if (backend_ctx->nvrtc)
         {
-          char *build_log = (char *) hcmalloc (build_log_size + 1);
+          nvrtcProgram program;
 
-          int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+          const int rc_nvrtcCreateProgram = hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], "main_kernel", 0, NULL, NULL);
 
-          if (CL_rc_build == -1) return -1;
+          if (rc_nvrtcCreateProgram == -1) return -1;
 
-          puts (build_log);
+          char **nvrtc_options = (char **) hccalloc (1 + strlen (build_options_module_buf) + 1, sizeof (char *)); // ...
 
-          hcfree (build_log);
+          nvrtc_options[0] = "--device-as-default-execution-space";
+
+          char *nvrtc_options_string = hcstrdup (build_options_module_buf);
+
+          const int num_options = 1 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 1);
+
+          const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options);
+
+          size_t build_log_size = 0;
+
+          hc_nvrtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
+
+          #if defined (DEBUG)
+          if ((build_log_size > 1) || (rc_nvrtcCompileProgram == -1))
+          #else
+          if (rc_nvrtcCompileProgram == -1)
+          #endif
+          {
+            char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+            const int rc_nvrtcGetProgramLog = hc_nvrtcGetProgramLog (hashcat_ctx, program, build_log);
+
+            if (rc_nvrtcGetProgramLog == -1) return -1;
+
+            puts (build_log);
+
+            hcfree (build_log);
+          }
+
+          if (rc_nvrtcCompileProgram == -1)
+          {
+            device_param->skipped_warning = true;
+
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+
+            continue;
+          }
+
+          hcfree (nvrtc_options);
+          hcfree (nvrtc_options_string);
+
+          if (cache_disable == false)
+          {
+            size_t binary_size;
+
+            const int rc_nvrtcGetPTXSize = hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size);
+
+            if (rc_nvrtcGetPTXSize == -1) return -1;
+
+            char *binary = (char *) hcmalloc (binary_size);
+
+            const int nvrtcGetPTX = hc_nvrtcGetPTX (hashcat_ctx, program, binary);
+
+            if (nvrtcGetPTX == -1) return -1;
+
+            const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+
+            if (rc_write == false) return -1;
+
+            hcfree (binary);
+          }
+
+          const int rc_nvrtcDestroyProgram = hc_nvrtcDestroyProgram (hashcat_ctx, &program);
+
+          if (rc_nvrtcDestroyProgram == -1) return -1;
         }
 
-        if (CL_rc == -1)
+        if (1) // later just else
         {
-          device_param->skipped_warning = true;
-
-          event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
-
-          continue;
-        }
-
-        if (cache_disable == false)
-        {
-          size_t binary_size;
-
-          CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+          CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program);
 
           if (CL_rc == -1) return -1;
 
-          char *binary = (char *) hcmalloc (binary_size);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->device, build_options_module_buf, NULL, NULL);
 
-          CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+          //if (CL_rc == -1) return -1;
 
-          if (CL_rc == -1) return -1;
+          size_t build_log_size = 0;
 
-          const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
 
-          if (rc_write == false) return -1;
+          //if (CL_rc == -1) return -1;
 
-          hcfree (binary);
+          #if defined (DEBUG)
+          if ((build_log_size > 1) || (CL_rc == -1))
+          #else
+          if (CL_rc == -1)
+          #endif
+          {
+            char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+
+            if (CL_rc_build == -1) return -1;
+
+            puts (build_log);
+
+            hcfree (build_log);
+          }
+
+          if (CL_rc == -1)
+          {
+            device_param->skipped_warning = true;
+
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+
+            continue;
+          }
+
+          if (cache_disable == false)
+          {
+            size_t binary_size;
+
+            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            char *binary = (char *) hcmalloc (binary_size);
+
+            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+
+            if (rc_write == false) return -1;
+
+            hcfree (binary);
+          }
         }
       }
       else
diff --git a/src/convert.c b/src/convert.c
index c9426f617..bed989ae4 100644
--- a/src/convert.c
+++ b/src/convert.c
@@ -845,7 +845,7 @@ u8 v8a_from_v32 (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v8a;
+  return v.v8.a;
 }
 
 u8 v8b_from_v32 (const u32 v32)
@@ -854,7 +854,7 @@ u8 v8b_from_v32 (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v8b;
+  return v.v8.b;
 }
 
 u8 v8c_from_v32 (const u32 v32)
@@ -863,7 +863,7 @@ u8 v8c_from_v32 (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v8c;
+  return v.v8.c;
 }
 
 u8 v8d_from_v32 (const u32 v32)
@@ -872,7 +872,7 @@ u8 v8d_from_v32 (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v8d;
+  return v.v8.d;
 }
 
 u16 v16a_from_v32 (const u32 v32)
@@ -881,7 +881,7 @@ u16 v16a_from_v32 (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v16a;
+  return v.v16.a;
 }
 
 u16 v16b_from_v32 (const u32 v32)
@@ -890,15 +890,15 @@ u16 v16b_from_v32 (const u32 v32)
 
   v.v32 = v32;
 
-  return v.v16b;
+  return v.v16.b;
 }
 
 u32 v32_from_v16ab (const u16 v16a, const u16 v16b)
 {
   vconv32_t v;
 
-  v.v16a = v16a;
-  v.v16b = v16b;
+  v.v16.a = v16a;
+  v.v16.b = v16b;
 
   return v.v32;
 }
@@ -909,7 +909,7 @@ u32 v32a_from_v64 (const u64 v64)
 
   v.v64 = v64;
 
-  return v.v32a;
+  return v.v32.a;
 }
 
 u32 v32b_from_v64 (const u64 v64)
@@ -918,15 +918,15 @@ u32 v32b_from_v64 (const u64 v64)
 
   v.v64 = v64;
 
-  return v.v32b;
+  return v.v32.b;
 }
 
 u64 v64_from_v32ab (const u32 v32a, const u32 v32b)
 {
   vconv64_t v;
 
-  v.v32a = v32a;
-  v.v32b = v32b;
+  v.v32.a = v32a;
+  v.v32.b = v32b;
 
   return v.v64;
 }
diff --git a/src/ext_nvrtc.c b/src/ext_nvrtc.c
index 17e6ff03b..634caac5d 100644
--- a/src/ext_nvrtc.c
+++ b/src/ext_nvrtc.c
@@ -6,3 +6,22 @@
 #include "common.h"
 #include "types.h"
 #include "ext_nvrtc.h"
+
+int nvrtc_make_options_array_from_string (char *string, char **options)
+{
+  char *saveptr = NULL;
+
+  char *next = strtok_r (string, " ", &saveptr);
+
+  int cnt = 0;
+
+  do
+  {
+    options[cnt] = next;
+
+    cnt++;
+
+  } while ((next = strtok_r ((char *) NULL, " ", &saveptr)) != NULL);
+
+  return cnt;
+}
\ No newline at end of file

From 00e1e324920953a115081aa5dead266cac6f6a4d Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 13:34:07 +0200
Subject: [PATCH 04/73] Replace barrier() with SYNC_THREADS()

---
 OpenCL/m01100_a0-optimized.cl |  4 ++--
 OpenCL/m01100_a1-optimized.cl |  4 ++--
 OpenCL/m01100_a3-optimized.cl | 12 ++++++------
 OpenCL/m01500_a0-pure.cl      |  4 ++--
 OpenCL/m01500_a1-pure.cl      |  4 ++--
 OpenCL/m02500-pure.cl         |  2 +-
 OpenCL/m02501-pure.cl         |  2 +-
 OpenCL/m02610_a0-optimized.cl |  4 ++--
 OpenCL/m02610_a0-pure.cl      |  4 ++--
 OpenCL/m02610_a1-optimized.cl |  4 ++--
 OpenCL/m02610_a1-pure.cl      |  4 ++--
 OpenCL/m02610_a3-optimized.cl | 12 ++++++------
 OpenCL/m02610_a3-pure.cl      |  4 ++--
 OpenCL/m02710_a0-optimized.cl |  4 ++--
 OpenCL/m02710_a1-optimized.cl |  4 ++--
 OpenCL/m02710_a3-optimized.cl | 12 ++++++------
 OpenCL/m02810_a0-optimized.cl |  4 ++--
 OpenCL/m02810_a0-pure.cl      |  4 ++--
 OpenCL/m02810_a1-optimized.cl |  4 ++--
 OpenCL/m02810_a1-pure.cl      |  4 ++--
 OpenCL/m02810_a3-optimized.cl | 12 ++++++------
 OpenCL/m02810_a3-pure.cl      |  4 ++--
 OpenCL/m03000_a0-pure.cl      |  4 ++--
 OpenCL/m03000_a1-pure.cl      |  4 ++--
 OpenCL/m03100_a0-optimized.cl |  4 ++--
 OpenCL/m03100_a1-optimized.cl |  4 ++--
 OpenCL/m03100_a3-optimized.cl |  8 ++++----
 OpenCL/m03710_a0-optimized.cl |  4 ++--
 OpenCL/m03710_a0-pure.cl      |  4 ++--
 OpenCL/m03710_a1-optimized.cl |  4 ++--
 OpenCL/m03710_a1-pure.cl      |  4 ++--
 OpenCL/m03710_a3-optimized.cl | 12 ++++++------
 OpenCL/m03710_a3-pure.cl      |  4 ++--
 OpenCL/m03910_a0-optimized.cl |  4 ++--
 OpenCL/m03910_a0-pure.cl      |  4 ++--
 OpenCL/m03910_a1-optimized.cl |  4 ++--
 OpenCL/m03910_a1-pure.cl      |  4 ++--
 OpenCL/m03910_a3-optimized.cl | 12 ++++++------
 OpenCL/m03910_a3-pure.cl      |  4 ++--
 OpenCL/m04010_a0-optimized.cl |  4 ++--
 OpenCL/m04010_a0-pure.cl      |  4 ++--
 OpenCL/m04010_a1-optimized.cl |  4 ++--
 OpenCL/m04010_a1-pure.cl      |  4 ++--
 OpenCL/m04010_a3-optimized.cl | 12 ++++++------
 OpenCL/m04010_a3-pure.cl      |  4 ++--
 OpenCL/m04110_a0-optimized.cl |  4 ++--
 OpenCL/m04110_a0-pure.cl      |  4 ++--
 OpenCL/m04110_a1-optimized.cl |  4 ++--
 OpenCL/m04110_a1-pure.cl      |  4 ++--
 OpenCL/m04110_a3-optimized.cl | 12 ++++++------
 OpenCL/m04110_a3-pure.cl      |  4 ++--
 OpenCL/m04310_a0-optimized.cl |  4 ++--
 OpenCL/m04310_a0-pure.cl      |  4 ++--
 OpenCL/m04310_a1-optimized.cl |  4 ++--
 OpenCL/m04310_a1-pure.cl      |  4 ++--
 OpenCL/m04310_a3-optimized.cl | 12 ++++++------
 OpenCL/m04310_a3-pure.cl      |  4 ++--
 OpenCL/m04400_a0-optimized.cl |  4 ++--
 OpenCL/m04400_a0-pure.cl      |  4 ++--
 OpenCL/m04400_a1-optimized.cl |  4 ++--
 OpenCL/m04400_a1-pure.cl      |  4 ++--
 OpenCL/m04400_a3-optimized.cl | 12 ++++++------
 OpenCL/m04400_a3-pure.cl      |  4 ++--
 OpenCL/m04500_a0-optimized.cl |  4 ++--
 OpenCL/m04500_a0-pure.cl      |  4 ++--
 OpenCL/m04500_a1-optimized.cl |  4 ++--
 OpenCL/m04500_a1-pure.cl      |  4 ++--
 OpenCL/m04500_a3-optimized.cl | 12 ++++++------
 OpenCL/m04500_a3-pure.cl      |  4 ++--
 OpenCL/m04520_a0-optimized.cl |  4 ++--
 OpenCL/m04520_a0-pure.cl      |  4 ++--
 OpenCL/m04520_a1-optimized.cl |  4 ++--
 OpenCL/m04520_a1-pure.cl      |  4 ++--
 OpenCL/m04520_a3-optimized.cl | 12 ++++++------
 OpenCL/m04520_a3-pure.cl      |  4 ++--
 OpenCL/m04700_a0-optimized.cl |  4 ++--
 OpenCL/m04700_a0-pure.cl      |  4 ++--
 OpenCL/m04700_a1-optimized.cl |  4 ++--
 OpenCL/m04700_a1-pure.cl      |  4 ++--
 OpenCL/m04700_a3-optimized.cl | 12 ++++++------
 OpenCL/m04700_a3-pure.cl      |  4 ++--
 OpenCL/m05300_a0-optimized.cl |  4 ++--
 OpenCL/m05300_a1-optimized.cl |  4 ++--
 OpenCL/m05300_a3-optimized.cl | 12 ++++++------
 OpenCL/m05400_a0-optimized.cl |  4 ++--
 OpenCL/m05400_a1-optimized.cl |  4 ++--
 OpenCL/m05400_a3-optimized.cl | 12 ++++++------
 OpenCL/m05500_a0-optimized.cl |  4 ++--
 OpenCL/m05500_a0-pure.cl      |  4 ++--
 OpenCL/m05500_a1-optimized.cl |  4 ++--
 OpenCL/m05500_a1-pure.cl      |  4 ++--
 OpenCL/m05500_a3-optimized.cl | 12 ++++++------
 OpenCL/m05500_a3-pure.cl      |  4 ++--
 OpenCL/m05600_a0-optimized.cl |  4 ++--
 OpenCL/m05600_a1-optimized.cl |  4 ++--
 OpenCL/m05600_a3-optimized.cl | 12 ++++++------
 OpenCL/m05800-optimized.cl    |  2 +-
 OpenCL/m05800-pure.cl         |  2 +-
 OpenCL/m06100_a0-optimized.cl |  4 ++--
 OpenCL/m06100_a0-pure.cl      |  4 ++--
 OpenCL/m06100_a1-optimized.cl |  4 ++--
 OpenCL/m06100_a1-pure.cl      |  4 ++--
 OpenCL/m06100_a3-optimized.cl |  8 ++++----
 OpenCL/m06100_a3-pure.cl      |  4 ++--
 OpenCL/m06211-pure.cl         |  4 ++--
 OpenCL/m06212-pure.cl         |  4 ++--
 OpenCL/m06213-pure.cl         |  4 ++--
 OpenCL/m06221-pure.cl         |  4 ++--
 OpenCL/m06222-pure.cl         |  4 ++--
 OpenCL/m06223-pure.cl         |  4 ++--
 OpenCL/m06231-pure.cl         | 10 +++++-----
 OpenCL/m06232-pure.cl         | 10 +++++-----
 OpenCL/m06233-pure.cl         | 10 +++++-----
 OpenCL/m06600-pure.cl         |  2 +-
 OpenCL/m06800-pure.cl         |  2 +-
 OpenCL/m06900_a0-optimized.cl |  4 ++--
 OpenCL/m06900_a1-optimized.cl |  4 ++--
 OpenCL/m06900_a3-optimized.cl |  8 ++++----
 OpenCL/m08000_a0-optimized.cl |  8 ++++----
 OpenCL/m08000_a1-optimized.cl |  8 ++++----
 OpenCL/m08000_a3-optimized.cl |  8 ++++----
 OpenCL/m08400_a0-optimized.cl |  4 ++--
 OpenCL/m08400_a0-pure.cl      |  4 ++--
 OpenCL/m08400_a1-optimized.cl |  4 ++--
 OpenCL/m08400_a1-pure.cl      |  4 ++--
 OpenCL/m08400_a3-optimized.cl | 12 ++++++------
 OpenCL/m08400_a3-pure.cl      |  4 ++--
 OpenCL/m08500_a0-pure.cl      |  4 ++--
 OpenCL/m08500_a1-pure.cl      |  4 ++--
 OpenCL/m08500_a3-pure.cl      |  4 ++--
 OpenCL/m08600_a0-pure.cl      |  4 ++--
 OpenCL/m08600_a1-pure.cl      |  4 ++--
 OpenCL/m08600_a3-pure.cl      |  4 ++--
 OpenCL/m08700_a0-optimized.cl |  4 ++--
 OpenCL/m08700_a1-optimized.cl |  4 ++--
 OpenCL/m08700_a3-optimized.cl | 12 ++++++------
 OpenCL/m08800-pure.cl         |  2 +-
 OpenCL/m09100-pure.cl         |  2 +-
 OpenCL/m09400-pure.cl         |  2 +-
 OpenCL/m09500-pure.cl         |  2 +-
 OpenCL/m09600-pure.cl         |  2 +-
 OpenCL/m10700-optimized.cl    |  2 +-
 OpenCL/m10700-pure.cl         |  2 +-
 OpenCL/m11100_a0-optimized.cl |  4 ++--
 OpenCL/m11100_a0-pure.cl      |  4 ++--
 OpenCL/m11100_a1-optimized.cl |  4 ++--
 OpenCL/m11100_a1-pure.cl      |  4 ++--
 OpenCL/m11100_a3-optimized.cl | 12 ++++++------
 OpenCL/m11100_a3-pure.cl      |  4 ++--
 OpenCL/m11300-pure.cl         |  2 +-
 OpenCL/m11400_a0-pure.cl      |  4 ++--
 OpenCL/m11400_a1-pure.cl      |  4 ++--
 OpenCL/m11400_a3-pure.cl      |  4 ++--
 OpenCL/m11700_a0-optimized.cl |  4 ++--
 OpenCL/m11700_a0-pure.cl      |  4 ++--
 OpenCL/m11700_a1-optimized.cl |  4 ++--
 OpenCL/m11700_a1-pure.cl      |  4 ++--
 OpenCL/m11700_a3-optimized.cl | 12 ++++++------
 OpenCL/m11700_a3-pure.cl      |  4 ++--
 OpenCL/m11750_a0-pure.cl      |  4 ++--
 OpenCL/m11750_a1-pure.cl      |  4 ++--
 OpenCL/m11750_a3-pure.cl      |  4 ++--
 OpenCL/m11760_a0-pure.cl      |  4 ++--
 OpenCL/m11760_a1-pure.cl      |  4 ++--
 OpenCL/m11760_a3-pure.cl      |  4 ++--
 OpenCL/m11800_a0-optimized.cl |  4 ++--
 OpenCL/m11800_a0-pure.cl      |  4 ++--
 OpenCL/m11800_a1-optimized.cl |  4 ++--
 OpenCL/m11800_a1-pure.cl      |  4 ++--
 OpenCL/m11800_a3-optimized.cl | 12 ++++++------
 OpenCL/m11800_a3-pure.cl      |  4 ++--
 OpenCL/m11850_a0-pure.cl      |  4 ++--
 OpenCL/m11850_a1-pure.cl      |  4 ++--
 OpenCL/m11850_a3-pure.cl      |  4 ++--
 OpenCL/m11860_a0-pure.cl      |  4 ++--
 OpenCL/m11860_a1-pure.cl      |  4 ++--
 OpenCL/m11860_a3-pure.cl      |  4 ++--
 OpenCL/m12400-pure.cl         |  4 ++--
 OpenCL/m12500-pure.cl         |  2 +-
 OpenCL/m12600_a0-optimized.cl |  4 ++--
 OpenCL/m12600_a0-pure.cl      |  4 ++--
 OpenCL/m12600_a1-optimized.cl |  4 ++--
 OpenCL/m12600_a1-pure.cl      |  4 ++--
 OpenCL/m12600_a3-optimized.cl | 12 ++++++------
 OpenCL/m12600_a3-pure.cl      |  4 ++--
 OpenCL/m12700-pure.cl         |  2 +-
 OpenCL/m12800-pure.cl         |  2 +-
 OpenCL/m13200-pure.cl         |  2 +-
 OpenCL/m13400-pure.cl         |  4 ++--
 OpenCL/m13711-pure.cl         |  6 +++---
 OpenCL/m13712-pure.cl         |  6 +++---
 OpenCL/m13713-pure.cl         |  6 +++---
 OpenCL/m13721-pure.cl         |  6 +++---
 OpenCL/m13722-pure.cl         |  6 +++---
 OpenCL/m13723-pure.cl         |  6 +++---
 OpenCL/m13731-pure.cl         | 12 ++++++------
 OpenCL/m13732-pure.cl         | 12 ++++++------
 OpenCL/m13733-pure.cl         | 12 ++++++------
 OpenCL/m13751-pure.cl         |  6 +++---
 OpenCL/m13752-pure.cl         |  6 +++---
 OpenCL/m13753-pure.cl         |  6 +++---
 OpenCL/m13771-pure.cl         |  8 ++++----
 OpenCL/m13772-pure.cl         |  8 ++++----
 OpenCL/m13773-pure.cl         |  8 ++++----
 OpenCL/m13800_a0-optimized.cl |  4 ++--
 OpenCL/m13800_a1-optimized.cl |  4 ++--
 OpenCL/m13800_a3-optimized.cl | 12 ++++++------
 OpenCL/m13900_a0-optimized.cl |  4 ++--
 OpenCL/m13900_a0-pure.cl      |  4 ++--
 OpenCL/m13900_a1-optimized.cl |  4 ++--
 OpenCL/m13900_a1-pure.cl      |  4 ++--
 OpenCL/m13900_a3-optimized.cl | 12 ++++++------
 OpenCL/m13900_a3-pure.cl      |  4 ++--
 OpenCL/m14000_a0-pure.cl      |  4 ++--
 OpenCL/m14000_a1-pure.cl      |  4 ++--
 OpenCL/m14100_a0-pure.cl      |  4 ++--
 OpenCL/m14100_a1-pure.cl      |  4 ++--
 OpenCL/m14100_a3-pure.cl      |  4 ++--
 OpenCL/m14400_a0-optimized.cl |  4 ++--
 OpenCL/m14400_a0-pure.cl      |  4 ++--
 OpenCL/m14400_a1-optimized.cl |  4 ++--
 OpenCL/m14400_a1-pure.cl      |  4 ++--
 OpenCL/m14400_a3-optimized.cl | 12 ++++++------
 OpenCL/m14400_a3-pure.cl      |  4 ++--
 OpenCL/m14611-pure.cl         |  2 +-
 OpenCL/m14621-pure.cl         |  2 +-
 OpenCL/m14631-pure.cl         |  2 +-
 OpenCL/m14641-pure.cl         |  2 +-
 OpenCL/m14700-pure.cl         |  2 +-
 OpenCL/m14800-pure.cl         |  2 +-
 OpenCL/m14900_a0-optimized.cl |  4 ++--
 OpenCL/m14900_a1-optimized.cl |  4 ++--
 OpenCL/m14900_a3-optimized.cl |  4 ++--
 OpenCL/m15300-pure.cl         |  2 +-
 OpenCL/m15900-pure.cl         |  2 +-
 OpenCL/m16000_a0-pure.cl      |  4 ++--
 OpenCL/m16000_a1-pure.cl      |  4 ++--
 OpenCL/m16000_a3-pure.cl      |  4 ++--
 OpenCL/m16200-pure.cl         |  2 +-
 OpenCL/m16300-pure.cl         |  2 +-
 OpenCL/m16600_a0-optimized.cl |  4 ++--
 OpenCL/m16600_a0-pure.cl      |  4 ++--
 OpenCL/m16600_a1-optimized.cl |  4 ++--
 OpenCL/m16600_a1-pure.cl      |  4 ++--
 OpenCL/m16600_a3-optimized.cl | 12 ++++++------
 OpenCL/m16600_a3-pure.cl      |  4 ++--
 OpenCL/m18300-pure.cl         |  2 +-
 OpenCL/m18400-pure.cl         |  2 +-
 OpenCL/m18500_a0-pure.cl      |  4 ++--
 OpenCL/m18500_a1-pure.cl      |  4 ++--
 OpenCL/m18500_a3-pure.cl      |  4 ++--
 OpenCL/m18900-pure.cl         |  2 +-
 OpenCL/m19500_a0-pure.cl      |  4 ++--
 OpenCL/m19500_a1-pure.cl      |  4 ++--
 OpenCL/m19500_a3-pure.cl      |  4 ++--
 OpenCL/m19600-pure.cl         |  2 +-
 OpenCL/m19700-pure.cl         |  2 +-
 OpenCL/m19800-pure.cl         |  2 +-
 OpenCL/m19900-pure.cl         |  2 +-
 OpenCL/m20011-pure.cl         |  2 +-
 OpenCL/m20012-pure.cl         |  2 +-
 OpenCL/m20013-pure.cl         |  2 +-
 262 files changed, 642 insertions(+), 642 deletions(-)

diff --git a/OpenCL/m01100_a0-optimized.cl b/OpenCL/m01100_a0-optimized.cl
index bd0b99fe9..f31993bf2 100644
--- a/OpenCL/m01100_a0-optimized.cl
+++ b/OpenCL/m01100_a0-optimized.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_RULES ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -270,7 +270,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_RULES ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m01100_a1-optimized.cl b/OpenCL/m01100_a1-optimized.cl
index 64a69d4b1..a63c340d1 100644
--- a/OpenCL/m01100_a1-optimized.cl
+++ b/OpenCL/m01100_a1-optimized.cl
@@ -49,7 +49,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_BASIC ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -330,7 +330,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_BASIC ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m01100_a3-optimized.cl b/OpenCL/m01100_a3-optimized.cl
index 73d1b826d..57e255107 100644
--- a/OpenCL/m01100_a3-optimized.cl
+++ b/OpenCL/m01100_a3-optimized.cl
@@ -541,7 +541,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_VECTOR ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -595,7 +595,7 @@ KERNEL_FQ void m01100_m08 (KERN_ATTR_VECTOR ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -649,7 +649,7 @@ KERNEL_FQ void m01100_m16 (KERN_ATTR_VECTOR ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -703,7 +703,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_VECTOR ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -757,7 +757,7 @@ KERNEL_FQ void m01100_s08 (KERN_ATTR_VECTOR ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -811,7 +811,7 @@ KERNEL_FQ void m01100_s16 (KERN_ATTR_VECTOR ())
     s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m01500_a0-pure.cl b/OpenCL/m01500_a0-pure.cl
index f3a4d5913..a1d94d749 100644
--- a/OpenCL/m01500_a0-pure.cl
+++ b/OpenCL/m01500_a0-pure.cl
@@ -519,7 +519,7 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -603,7 +603,7 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m01500_a1-pure.cl b/OpenCL/m01500_a1-pure.cl
index 88a06b749..81b0a22cb 100644
--- a/OpenCL/m01500_a1-pure.cl
+++ b/OpenCL/m01500_a1-pure.cl
@@ -517,7 +517,7 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -680,7 +680,7 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl
index 8066d3298..bb46357f5 100644
--- a/OpenCL/m02500-pure.cl
+++ b/OpenCL/m02500-pure.cl
@@ -682,7 +682,7 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)
   #ifdef IS_CUDA
   __syncthreads();
   #else
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
   #endif
 
   #else
diff --git a/OpenCL/m02501-pure.cl b/OpenCL/m02501-pure.cl
index 6da60615e..f51e8f2db 100644
--- a/OpenCL/m02501-pure.cl
+++ b/OpenCL/m02501-pure.cl
@@ -549,7 +549,7 @@ KERNEL_FQ void m02501_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m02610_a0-optimized.cl b/OpenCL/m02610_a0-optimized.cl
index df8899488..6744a5026 100644
--- a/OpenCL/m02610_a0-optimized.cl
+++ b/OpenCL/m02610_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -336,7 +336,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02610_a0-pure.cl b/OpenCL/m02610_a0-pure.cl
index 5c209f995..092557282 100644
--- a/OpenCL/m02610_a0-pure.cl
+++ b/OpenCL/m02610_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -155,7 +155,7 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02610_a1-optimized.cl b/OpenCL/m02610_a1-optimized.cl
index ad323f53e..bca78bc86 100644
--- a/OpenCL/m02610_a1-optimized.cl
+++ b/OpenCL/m02610_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -393,7 +393,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02610_a1-pure.cl b/OpenCL/m02610_a1-pure.cl
index 46f3808bb..ba64a81e5 100644
--- a/OpenCL/m02610_a1-pure.cl
+++ b/OpenCL/m02610_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -151,7 +151,7 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02610_a3-optimized.cl b/OpenCL/m02610_a3-optimized.cl
index 73af945f8..482e34094 100644
--- a/OpenCL/m02610_a3-optimized.cl
+++ b/OpenCL/m02610_a3-optimized.cl
@@ -616,7 +616,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -686,7 +686,7 @@ KERNEL_FQ void m02610_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -756,7 +756,7 @@ KERNEL_FQ void m02610_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -826,7 +826,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -896,7 +896,7 @@ KERNEL_FQ void m02610_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -966,7 +966,7 @@ KERNEL_FQ void m02610_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02610_a3-pure.cl b/OpenCL/m02610_a3-pure.cl
index 842897b54..6f992a8a5 100644
--- a/OpenCL/m02610_a3-pure.cl
+++ b/OpenCL/m02610_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -164,7 +164,7 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02710_a0-optimized.cl b/OpenCL/m02710_a0-optimized.cl
index cfe83ae2d..74c7190f8 100644
--- a/OpenCL/m02710_a0-optimized.cl
+++ b/OpenCL/m02710_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -421,7 +421,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02710_a1-optimized.cl b/OpenCL/m02710_a1-optimized.cl
index 89927efc9..45595e2d6 100644
--- a/OpenCL/m02710_a1-optimized.cl
+++ b/OpenCL/m02710_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -478,7 +478,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02710_a3-optimized.cl b/OpenCL/m02710_a3-optimized.cl
index a02f67260..c37eb27e9 100644
--- a/OpenCL/m02710_a3-optimized.cl
+++ b/OpenCL/m02710_a3-optimized.cl
@@ -785,7 +785,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -855,7 +855,7 @@ KERNEL_FQ void m02710_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -925,7 +925,7 @@ KERNEL_FQ void m02710_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -995,7 +995,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1065,7 +1065,7 @@ KERNEL_FQ void m02710_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1135,7 +1135,7 @@ KERNEL_FQ void m02710_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02810_a0-optimized.cl b/OpenCL/m02810_a0-optimized.cl
index 25757ed87..3c5094284 100644
--- a/OpenCL/m02810_a0-optimized.cl
+++ b/OpenCL/m02810_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -420,7 +420,7 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02810_a0-pure.cl b/OpenCL/m02810_a0-pure.cl
index 5e22e1b3b..c101ba360 100644
--- a/OpenCL/m02810_a0-pure.cl
+++ b/OpenCL/m02810_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -178,7 +178,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02810_a1-optimized.cl b/OpenCL/m02810_a1-optimized.cl
index 081e75270..d173d4832 100644
--- a/OpenCL/m02810_a1-optimized.cl
+++ b/OpenCL/m02810_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -477,7 +477,7 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02810_a1-pure.cl b/OpenCL/m02810_a1-pure.cl
index 187fe0dfc..31cb8de8c 100644
--- a/OpenCL/m02810_a1-pure.cl
+++ b/OpenCL/m02810_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -174,7 +174,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02810_a3-optimized.cl b/OpenCL/m02810_a3-optimized.cl
index 1f80d06f3..0fb3dd780 100644
--- a/OpenCL/m02810_a3-optimized.cl
+++ b/OpenCL/m02810_a3-optimized.cl
@@ -783,7 +783,7 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -853,7 +853,7 @@ KERNEL_FQ void m02810_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -923,7 +923,7 @@ KERNEL_FQ void m02810_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -993,7 +993,7 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1063,7 +1063,7 @@ KERNEL_FQ void m02810_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1133,7 +1133,7 @@ KERNEL_FQ void m02810_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m02810_a3-pure.cl b/OpenCL/m02810_a3-pure.cl
index 0af147c6b..903954989 100644
--- a/OpenCL/m02810_a3-pure.cl
+++ b/OpenCL/m02810_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -187,7 +187,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03000_a0-pure.cl b/OpenCL/m03000_a0-pure.cl
index 531d294e9..c35d938de 100644
--- a/OpenCL/m03000_a0-pure.cl
+++ b/OpenCL/m03000_a0-pure.cl
@@ -529,7 +529,7 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -614,7 +614,7 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03000_a1-pure.cl b/OpenCL/m03000_a1-pure.cl
index 0e00a37ef..ffadf5214 100644
--- a/OpenCL/m03000_a1-pure.cl
+++ b/OpenCL/m03000_a1-pure.cl
@@ -527,7 +527,7 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -691,7 +691,7 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03100_a0-optimized.cl b/OpenCL/m03100_a0-optimized.cl
index 426594c63..d37a1db37 100644
--- a/OpenCL/m03100_a0-optimized.cl
+++ b/OpenCL/m03100_a0-optimized.cl
@@ -56,7 +56,7 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -288,7 +288,7 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m03100_a1-optimized.cl b/OpenCL/m03100_a1-optimized.cl
index d565e1530..601c1c62b 100644
--- a/OpenCL/m03100_a1-optimized.cl
+++ b/OpenCL/m03100_a1-optimized.cl
@@ -54,7 +54,7 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -346,7 +346,7 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m03100_a3-optimized.cl b/OpenCL/m03100_a3-optimized.cl
index 41fe2f1f5..fb8618271 100644
--- a/OpenCL/m03100_a3-optimized.cl
+++ b/OpenCL/m03100_a3-optimized.cl
@@ -452,7 +452,7 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -535,7 +535,7 @@ KERNEL_FQ void m03100_m08 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -622,7 +622,7 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -705,7 +705,7 @@ KERNEL_FQ void m03100_s08 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m03710_a0-optimized.cl b/OpenCL/m03710_a0-optimized.cl
index 13f534ec4..1956e8eaf 100644
--- a/OpenCL/m03710_a0-optimized.cl
+++ b/OpenCL/m03710_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -367,7 +367,7 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03710_a0-pure.cl b/OpenCL/m03710_a0-pure.cl
index 600e2bfbd..a5ac06721 100644
--- a/OpenCL/m03710_a0-pure.cl
+++ b/OpenCL/m03710_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -168,7 +168,7 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03710_a1-optimized.cl b/OpenCL/m03710_a1-optimized.cl
index 6a09adc8d..885e12019 100644
--- a/OpenCL/m03710_a1-optimized.cl
+++ b/OpenCL/m03710_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -424,7 +424,7 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03710_a1-pure.cl b/OpenCL/m03710_a1-pure.cl
index 334a09bdb..48804bdc0 100644
--- a/OpenCL/m03710_a1-pure.cl
+++ b/OpenCL/m03710_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -164,7 +164,7 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03710_a3-optimized.cl b/OpenCL/m03710_a3-optimized.cl
index 6feff2427..6a92fe84a 100644
--- a/OpenCL/m03710_a3-optimized.cl
+++ b/OpenCL/m03710_a3-optimized.cl
@@ -643,7 +643,7 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -747,7 +747,7 @@ KERNEL_FQ void m03710_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -817,7 +817,7 @@ KERNEL_FQ void m03710_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -887,7 +887,7 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -957,7 +957,7 @@ KERNEL_FQ void m03710_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -993,7 +993,7 @@ KERNEL_FQ void m03710_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03710_a3-pure.cl b/OpenCL/m03710_a3-pure.cl
index b9c61f98f..0583ba6e0 100644
--- a/OpenCL/m03710_a3-pure.cl
+++ b/OpenCL/m03710_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -177,7 +177,7 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03910_a0-optimized.cl b/OpenCL/m03910_a0-optimized.cl
index 38346dea8..38586a366 100644
--- a/OpenCL/m03910_a0-optimized.cl
+++ b/OpenCL/m03910_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -420,7 +420,7 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03910_a0-pure.cl b/OpenCL/m03910_a0-pure.cl
index 9cebaca44..79fdb2265 100644
--- a/OpenCL/m03910_a0-pure.cl
+++ b/OpenCL/m03910_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -178,7 +178,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03910_a1-optimized.cl b/OpenCL/m03910_a1-optimized.cl
index d27fbdd63..c4b2f5cb2 100644
--- a/OpenCL/m03910_a1-optimized.cl
+++ b/OpenCL/m03910_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -477,7 +477,7 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03910_a1-pure.cl b/OpenCL/m03910_a1-pure.cl
index 3c3706a99..a27aa1fa4 100644
--- a/OpenCL/m03910_a1-pure.cl
+++ b/OpenCL/m03910_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -174,7 +174,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03910_a3-optimized.cl b/OpenCL/m03910_a3-optimized.cl
index aeacff0e7..3134cd506 100644
--- a/OpenCL/m03910_a3-optimized.cl
+++ b/OpenCL/m03910_a3-optimized.cl
@@ -783,7 +783,7 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -853,7 +853,7 @@ KERNEL_FQ void m03910_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -923,7 +923,7 @@ KERNEL_FQ void m03910_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -993,7 +993,7 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1063,7 +1063,7 @@ KERNEL_FQ void m03910_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1133,7 +1133,7 @@ KERNEL_FQ void m03910_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m03910_a3-pure.cl b/OpenCL/m03910_a3-pure.cl
index b4c269e4c..73698ba4e 100644
--- a/OpenCL/m03910_a3-pure.cl
+++ b/OpenCL/m03910_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -187,7 +187,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04010_a0-optimized.cl b/OpenCL/m04010_a0-optimized.cl
index efe2e8074..8f675941a 100644
--- a/OpenCL/m04010_a0-optimized.cl
+++ b/OpenCL/m04010_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -393,7 +393,7 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04010_a0-pure.cl b/OpenCL/m04010_a0-pure.cl
index 0eab0a63b..6557df376 100644
--- a/OpenCL/m04010_a0-pure.cl
+++ b/OpenCL/m04010_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -159,7 +159,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04010_a1-optimized.cl b/OpenCL/m04010_a1-optimized.cl
index 2d9d5ac11..22f5e9830 100644
--- a/OpenCL/m04010_a1-optimized.cl
+++ b/OpenCL/m04010_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -449,7 +449,7 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04010_a1-pure.cl b/OpenCL/m04010_a1-pure.cl
index 66d5c9f55..935273eb0 100644
--- a/OpenCL/m04010_a1-pure.cl
+++ b/OpenCL/m04010_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -155,7 +155,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04010_a3-optimized.cl b/OpenCL/m04010_a3-optimized.cl
index 34d678766..4d69d9c25 100644
--- a/OpenCL/m04010_a3-optimized.cl
+++ b/OpenCL/m04010_a3-optimized.cl
@@ -683,7 +683,7 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -787,7 +787,7 @@ KERNEL_FQ void m04010_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -857,7 +857,7 @@ KERNEL_FQ void m04010_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -927,7 +927,7 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -997,7 +997,7 @@ KERNEL_FQ void m04010_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1033,7 +1033,7 @@ KERNEL_FQ void m04010_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04010_a3-pure.cl b/OpenCL/m04010_a3-pure.cl
index 2f6882a17..3bc48a738 100644
--- a/OpenCL/m04010_a3-pure.cl
+++ b/OpenCL/m04010_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -172,7 +172,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04110_a0-optimized.cl b/OpenCL/m04110_a0-optimized.cl
index 77d12bc03..972d0113f 100644
--- a/OpenCL/m04110_a0-optimized.cl
+++ b/OpenCL/m04110_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -438,7 +438,7 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04110_a0-pure.cl b/OpenCL/m04110_a0-pure.cl
index 8347442e3..2426e5a16 100644
--- a/OpenCL/m04110_a0-pure.cl
+++ b/OpenCL/m04110_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -172,7 +172,7 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04110_a1-optimized.cl b/OpenCL/m04110_a1-optimized.cl
index 36bb2b2db..af3cb9274 100644
--- a/OpenCL/m04110_a1-optimized.cl
+++ b/OpenCL/m04110_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -496,7 +496,7 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04110_a1-pure.cl b/OpenCL/m04110_a1-pure.cl
index 9d9273f2a..dd230f3fe 100644
--- a/OpenCL/m04110_a1-pure.cl
+++ b/OpenCL/m04110_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -168,7 +168,7 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04110_a3-optimized.cl b/OpenCL/m04110_a3-optimized.cl
index c9dd1fcc5..3521ecb6d 100644
--- a/OpenCL/m04110_a3-optimized.cl
+++ b/OpenCL/m04110_a3-optimized.cl
@@ -739,7 +739,7 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -843,7 +843,7 @@ KERNEL_FQ void m04110_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -913,7 +913,7 @@ KERNEL_FQ void m04110_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -983,7 +983,7 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1053,7 +1053,7 @@ KERNEL_FQ void m04110_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1089,7 +1089,7 @@ KERNEL_FQ void m04110_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04110_a3-pure.cl b/OpenCL/m04110_a3-pure.cl
index 4c74ccbd2..e7c4bfbd0 100644
--- a/OpenCL/m04110_a3-pure.cl
+++ b/OpenCL/m04110_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -183,7 +183,7 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04310_a0-optimized.cl b/OpenCL/m04310_a0-optimized.cl
index dc693f19c..33bf2d335 100644
--- a/OpenCL/m04310_a0-optimized.cl
+++ b/OpenCL/m04310_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -336,7 +336,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04310_a0-pure.cl b/OpenCL/m04310_a0-pure.cl
index 0a0230892..cec409c1d 100644
--- a/OpenCL/m04310_a0-pure.cl
+++ b/OpenCL/m04310_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -155,7 +155,7 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04310_a1-optimized.cl b/OpenCL/m04310_a1-optimized.cl
index cc0b41619..bbd0c9508 100644
--- a/OpenCL/m04310_a1-optimized.cl
+++ b/OpenCL/m04310_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -393,7 +393,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04310_a1-pure.cl b/OpenCL/m04310_a1-pure.cl
index 2ce1d1fa7..b0acaf470 100644
--- a/OpenCL/m04310_a1-pure.cl
+++ b/OpenCL/m04310_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -151,7 +151,7 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04310_a3-optimized.cl b/OpenCL/m04310_a3-optimized.cl
index 159bf0f4a..6ecf6b1b3 100644
--- a/OpenCL/m04310_a3-optimized.cl
+++ b/OpenCL/m04310_a3-optimized.cl
@@ -616,7 +616,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -686,7 +686,7 @@ KERNEL_FQ void m04310_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -756,7 +756,7 @@ KERNEL_FQ void m04310_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -826,7 +826,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -896,7 +896,7 @@ KERNEL_FQ void m04310_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -966,7 +966,7 @@ KERNEL_FQ void m04310_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04310_a3-pure.cl b/OpenCL/m04310_a3-pure.cl
index f291aecb6..d40944557 100644
--- a/OpenCL/m04310_a3-pure.cl
+++ b/OpenCL/m04310_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -164,7 +164,7 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04400_a0-optimized.cl b/OpenCL/m04400_a0-optimized.cl
index ffad3ae09..05e04b25a 100644
--- a/OpenCL/m04400_a0-optimized.cl
+++ b/OpenCL/m04400_a0-optimized.cl
@@ -53,7 +53,7 @@ KERNEL_FQ void m04400_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -363,7 +363,7 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04400_a0-pure.cl b/OpenCL/m04400_a0-pure.cl
index 72718568e..dc63016e7 100644
--- a/OpenCL/m04400_a0-pure.cl
+++ b/OpenCL/m04400_a0-pure.cl
@@ -53,7 +53,7 @@ KERNEL_FQ void m04400_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -150,7 +150,7 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04400_a1-optimized.cl b/OpenCL/m04400_a1-optimized.cl
index 795178874..698ef9d86 100644
--- a/OpenCL/m04400_a1-optimized.cl
+++ b/OpenCL/m04400_a1-optimized.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m04400_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -419,7 +419,7 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04400_a1-pure.cl b/OpenCL/m04400_a1-pure.cl
index 9404b95fc..73df68e7b 100644
--- a/OpenCL/m04400_a1-pure.cl
+++ b/OpenCL/m04400_a1-pure.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m04400_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -146,7 +146,7 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04400_a3-optimized.cl b/OpenCL/m04400_a3-optimized.cl
index 33224fc61..220db208f 100644
--- a/OpenCL/m04400_a3-optimized.cl
+++ b/OpenCL/m04400_a3-optimized.cl
@@ -588,7 +588,7 @@ KERNEL_FQ void m04400_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -658,7 +658,7 @@ KERNEL_FQ void m04400_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -728,7 +728,7 @@ KERNEL_FQ void m04400_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -798,7 +798,7 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -868,7 +868,7 @@ KERNEL_FQ void m04400_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -938,7 +938,7 @@ KERNEL_FQ void m04400_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04400_a3-pure.cl b/OpenCL/m04400_a3-pure.cl
index 213a956ef..5cdee2e1a 100644
--- a/OpenCL/m04400_a3-pure.cl
+++ b/OpenCL/m04400_a3-pure.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m04400_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -159,7 +159,7 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04500_a0-optimized.cl b/OpenCL/m04500_a0-optimized.cl
index 573e50223..aa6785d5c 100644
--- a/OpenCL/m04500_a0-optimized.cl
+++ b/OpenCL/m04500_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04500_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -389,7 +389,7 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04500_a0-pure.cl b/OpenCL/m04500_a0-pure.cl
index 1dfa8b061..b8a539961 100644
--- a/OpenCL/m04500_a0-pure.cl
+++ b/OpenCL/m04500_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04500_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -149,7 +149,7 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04500_a1-optimized.cl b/OpenCL/m04500_a1-optimized.cl
index 05d593d27..e9e0c42f9 100644
--- a/OpenCL/m04500_a1-optimized.cl
+++ b/OpenCL/m04500_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04500_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -445,7 +445,7 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04500_a1-pure.cl b/OpenCL/m04500_a1-pure.cl
index 32db391dd..eead59928 100644
--- a/OpenCL/m04500_a1-pure.cl
+++ b/OpenCL/m04500_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04500_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -145,7 +145,7 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04500_a3-optimized.cl b/OpenCL/m04500_a3-optimized.cl
index 1be7a4321..a0f9f632f 100644
--- a/OpenCL/m04500_a3-optimized.cl
+++ b/OpenCL/m04500_a3-optimized.cl
@@ -647,7 +647,7 @@ KERNEL_FQ void m04500_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -717,7 +717,7 @@ KERNEL_FQ void m04500_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -787,7 +787,7 @@ KERNEL_FQ void m04500_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -857,7 +857,7 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -927,7 +927,7 @@ KERNEL_FQ void m04500_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -997,7 +997,7 @@ KERNEL_FQ void m04500_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04500_a3-pure.cl b/OpenCL/m04500_a3-pure.cl
index 086e72121..82173d7a4 100644
--- a/OpenCL/m04500_a3-pure.cl
+++ b/OpenCL/m04500_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04500_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -158,7 +158,7 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04520_a0-optimized.cl b/OpenCL/m04520_a0-optimized.cl
index a34bca23c..237702cc0 100644
--- a/OpenCL/m04520_a0-optimized.cl
+++ b/OpenCL/m04520_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -620,7 +620,7 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04520_a0-pure.cl b/OpenCL/m04520_a0-pure.cl
index b53826e38..5cd66b395 100644
--- a/OpenCL/m04520_a0-pure.cl
+++ b/OpenCL/m04520_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -164,7 +164,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04520_a1-optimized.cl b/OpenCL/m04520_a1-optimized.cl
index 955343e5c..2c6cf6a41 100644
--- a/OpenCL/m04520_a1-optimized.cl
+++ b/OpenCL/m04520_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -676,7 +676,7 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04520_a1-pure.cl b/OpenCL/m04520_a1-pure.cl
index 54179adab..4a34801fc 100644
--- a/OpenCL/m04520_a1-pure.cl
+++ b/OpenCL/m04520_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -160,7 +160,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04520_a3-optimized.cl b/OpenCL/m04520_a3-optimized.cl
index c3d1a96d3..52e229952 100644
--- a/OpenCL/m04520_a3-optimized.cl
+++ b/OpenCL/m04520_a3-optimized.cl
@@ -1100,7 +1100,7 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1170,7 +1170,7 @@ KERNEL_FQ void m04520_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1240,7 +1240,7 @@ KERNEL_FQ void m04520_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1310,7 +1310,7 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1380,7 +1380,7 @@ KERNEL_FQ void m04520_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1450,7 +1450,7 @@ KERNEL_FQ void m04520_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04520_a3-pure.cl b/OpenCL/m04520_a3-pure.cl
index c1e49123c..8ced12d82 100644
--- a/OpenCL/m04520_a3-pure.cl
+++ b/OpenCL/m04520_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -175,7 +175,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04700_a0-optimized.cl b/OpenCL/m04700_a0-optimized.cl
index 13a2b98c8..19ffea0a4 100644
--- a/OpenCL/m04700_a0-optimized.cl
+++ b/OpenCL/m04700_a0-optimized.cl
@@ -53,7 +53,7 @@ KERNEL_FQ void m04700_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -346,7 +346,7 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04700_a0-pure.cl b/OpenCL/m04700_a0-pure.cl
index 056a4bab3..3645fa853 100644
--- a/OpenCL/m04700_a0-pure.cl
+++ b/OpenCL/m04700_a0-pure.cl
@@ -53,7 +53,7 @@ KERNEL_FQ void m04700_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -145,7 +145,7 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04700_a1-optimized.cl b/OpenCL/m04700_a1-optimized.cl
index 5f590df8c..982e57043 100644
--- a/OpenCL/m04700_a1-optimized.cl
+++ b/OpenCL/m04700_a1-optimized.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m04700_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -399,7 +399,7 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04700_a1-pure.cl b/OpenCL/m04700_a1-pure.cl
index 357bf7a78..ddc4286ed 100644
--- a/OpenCL/m04700_a1-pure.cl
+++ b/OpenCL/m04700_a1-pure.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m04700_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -141,7 +141,7 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04700_a3-optimized.cl b/OpenCL/m04700_a3-optimized.cl
index d1bf72589..e2c1ab949 100644
--- a/OpenCL/m04700_a3-optimized.cl
+++ b/OpenCL/m04700_a3-optimized.cl
@@ -588,7 +588,7 @@ KERNEL_FQ void m04700_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -658,7 +658,7 @@ KERNEL_FQ void m04700_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -728,7 +728,7 @@ KERNEL_FQ void m04700_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -798,7 +798,7 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -868,7 +868,7 @@ KERNEL_FQ void m04700_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -938,7 +938,7 @@ KERNEL_FQ void m04700_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m04700_a3-pure.cl b/OpenCL/m04700_a3-pure.cl
index 29ca21e72..26a7524be 100644
--- a/OpenCL/m04700_a3-pure.cl
+++ b/OpenCL/m04700_a3-pure.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m04700_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -154,7 +154,7 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05300_a0-optimized.cl b/OpenCL/m05300_a0-optimized.cl
index 11a175095..f20d0d097 100644
--- a/OpenCL/m05300_a0-optimized.cl
+++ b/OpenCL/m05300_a0-optimized.cl
@@ -138,7 +138,7 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_RULES_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -313,7 +313,7 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05300_a1-optimized.cl b/OpenCL/m05300_a1-optimized.cl
index 290f60a35..fc2f75c04 100644
--- a/OpenCL/m05300_a1-optimized.cl
+++ b/OpenCL/m05300_a1-optimized.cl
@@ -136,7 +136,7 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -371,7 +371,7 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05300_a3-optimized.cl b/OpenCL/m05300_a3-optimized.cl
index c228edc09..1f961d197 100644
--- a/OpenCL/m05300_a3-optimized.cl
+++ b/OpenCL/m05300_a3-optimized.cl
@@ -442,7 +442,7 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -515,7 +515,7 @@ KERNEL_FQ void m05300_m08 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -588,7 +588,7 @@ KERNEL_FQ void m05300_m16 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -661,7 +661,7 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -734,7 +734,7 @@ KERNEL_FQ void m05300_s08 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -807,7 +807,7 @@ KERNEL_FQ void m05300_s16 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05400_a0-optimized.cl b/OpenCL/m05400_a0-optimized.cl
index 06cfe4d95..c2332db9e 100644
--- a/OpenCL/m05400_a0-optimized.cl
+++ b/OpenCL/m05400_a0-optimized.cl
@@ -142,7 +142,7 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_RULES_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -326,7 +326,7 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05400_a1-optimized.cl b/OpenCL/m05400_a1-optimized.cl
index 576f280d4..9ca6763e1 100644
--- a/OpenCL/m05400_a1-optimized.cl
+++ b/OpenCL/m05400_a1-optimized.cl
@@ -140,7 +140,7 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -392,7 +392,7 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05400_a3-optimized.cl b/OpenCL/m05400_a3-optimized.cl
index 21d723531..6a1b47117 100644
--- a/OpenCL/m05400_a3-optimized.cl
+++ b/OpenCL/m05400_a3-optimized.cl
@@ -446,7 +446,7 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -519,7 +519,7 @@ KERNEL_FQ void m05400_m08 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -592,7 +592,7 @@ KERNEL_FQ void m05400_m16 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -665,7 +665,7 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -738,7 +738,7 @@ KERNEL_FQ void m05400_s08 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -811,7 +811,7 @@ KERNEL_FQ void m05400_s16 (KERN_ATTR_ESALT (ikepsk_t))
     s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]);
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05500_a0-optimized.cl b/OpenCL/m05500_a0-optimized.cl
index 7db6100e7..7a3a58037 100644
--- a/OpenCL/m05500_a0-optimized.cl
+++ b/OpenCL/m05500_a0-optimized.cl
@@ -539,7 +539,7 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -752,7 +752,7 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05500_a0-pure.cl b/OpenCL/m05500_a0-pure.cl
index f83be3388..f13503698 100644
--- a/OpenCL/m05500_a0-pure.cl
+++ b/OpenCL/m05500_a0-pure.cl
@@ -539,7 +539,7 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -663,7 +663,7 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05500_a1-optimized.cl b/OpenCL/m05500_a1-optimized.cl
index 20de5bc7e..bd80ae13e 100644
--- a/OpenCL/m05500_a1-optimized.cl
+++ b/OpenCL/m05500_a1-optimized.cl
@@ -537,7 +537,7 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -803,7 +803,7 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05500_a1-pure.cl b/OpenCL/m05500_a1-pure.cl
index 14e152681..577117f3a 100644
--- a/OpenCL/m05500_a1-pure.cl
+++ b/OpenCL/m05500_a1-pure.cl
@@ -537,7 +537,7 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -659,7 +659,7 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl
index 5f43cd563..b1a34c2c0 100644
--- a/OpenCL/m05500_a3-optimized.cl
+++ b/OpenCL/m05500_a3-optimized.cl
@@ -870,7 +870,7 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -944,7 +944,7 @@ KERNEL_FQ void m05500_m08 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1018,7 +1018,7 @@ KERNEL_FQ void m05500_m16 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1092,7 +1092,7 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1166,7 +1166,7 @@ KERNEL_FQ void m05500_s08 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1240,7 +1240,7 @@ KERNEL_FQ void m05500_s16 (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05500_a3-pure.cl b/OpenCL/m05500_a3-pure.cl
index c4aa11718..698003a40 100644
--- a/OpenCL/m05500_a3-pure.cl
+++ b/OpenCL/m05500_a3-pure.cl
@@ -537,7 +537,7 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -672,7 +672,7 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05600_a0-optimized.cl b/OpenCL/m05600_a0-optimized.cl
index a1cb6f71d..1ee105135 100644
--- a/OpenCL/m05600_a0-optimized.cl
+++ b/OpenCL/m05600_a0-optimized.cl
@@ -141,7 +141,7 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_RULES_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -380,7 +380,7 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_RULES_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05600_a1-optimized.cl b/OpenCL/m05600_a1-optimized.cl
index 16af9bf75..c8d7bdf11 100644
--- a/OpenCL/m05600_a1-optimized.cl
+++ b/OpenCL/m05600_a1-optimized.cl
@@ -139,7 +139,7 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -436,7 +436,7 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05600_a3-optimized.cl b/OpenCL/m05600_a3-optimized.cl
index ba42483e9..646e07455 100644
--- a/OpenCL/m05600_a3-optimized.cl
+++ b/OpenCL/m05600_a3-optimized.cl
@@ -563,7 +563,7 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -636,7 +636,7 @@ KERNEL_FQ void m05600_m08 (KERN_ATTR_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -709,7 +709,7 @@ KERNEL_FQ void m05600_m16 (KERN_ATTR_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -782,7 +782,7 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -855,7 +855,7 @@ KERNEL_FQ void m05600_s08 (KERN_ATTR_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -928,7 +928,7 @@ KERNEL_FQ void m05600_s16 (KERN_ATTR_ESALT (netntlm_t))
     s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl
index 339376659..bedf6625e 100644
--- a/OpenCL/m05800-optimized.cl
+++ b/OpenCL/m05800-optimized.cl
@@ -2308,7 +2308,7 @@ KERNEL_FQ void m05800_loop (KERN_ATTR_TMPS (androidpin_tmp_t))
     s_pc_len[i] = c_pc_len[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m05800-pure.cl b/OpenCL/m05800-pure.cl
index a397ae119..465e47176 100644
--- a/OpenCL/m05800-pure.cl
+++ b/OpenCL/m05800-pure.cl
@@ -2128,7 +2128,7 @@ KERNEL_FQ void m05800_loop (KERN_ATTR_TMPS (androidpin_tmp_t))
     s_pc_len[i] = c_pc_len[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m06100_a0-optimized.cl b/OpenCL/m06100_a0-optimized.cl
index 046d0c4f9..00243058c 100644
--- a/OpenCL/m06100_a0-optimized.cl
+++ b/OpenCL/m06100_a0-optimized.cl
@@ -60,7 +60,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -200,7 +200,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_RULES ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06100_a0-pure.cl b/OpenCL/m06100_a0-pure.cl
index deca7a8ba..4482625ca 100644
--- a/OpenCL/m06100_a0-pure.cl
+++ b/OpenCL/m06100_a0-pure.cl
@@ -55,7 +55,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_RULES ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -139,7 +139,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_RULES ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06100_a1-optimized.cl b/OpenCL/m06100_a1-optimized.cl
index f81455869..e6cb58a41 100644
--- a/OpenCL/m06100_a1-optimized.cl
+++ b/OpenCL/m06100_a1-optimized.cl
@@ -58,7 +58,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -256,7 +256,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06100_a1-pure.cl b/OpenCL/m06100_a1-pure.cl
index 611927094..aea3847fb 100644
--- a/OpenCL/m06100_a1-pure.cl
+++ b/OpenCL/m06100_a1-pure.cl
@@ -53,7 +53,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_BASIC ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -135,7 +135,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_BASIC ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl
index da2d8745e..b9e10a98f 100644
--- a/OpenCL/m06100_a3-optimized.cl
+++ b/OpenCL/m06100_a3-optimized.cl
@@ -208,7 +208,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -300,7 +300,7 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -396,7 +396,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -488,7 +488,7 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06100_a3-pure.cl b/OpenCL/m06100_a3-pure.cl
index 3f2f65bfd..2f5bedfb1 100644
--- a/OpenCL/m06100_a3-pure.cl
+++ b/OpenCL/m06100_a3-pure.cl
@@ -53,7 +53,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_VECTOR ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -148,7 +148,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_VECTOR ())
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06211-pure.cl b/OpenCL/m06211-pure.cl
index 529049401..7e84978c8 100644
--- a/OpenCL/m06211-pure.cl
+++ b/OpenCL/m06211-pure.cl
@@ -99,7 +99,7 @@ KERNEL_FQ void m06211_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -331,7 +331,7 @@ KERNEL_FQ void m06211_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06212-pure.cl b/OpenCL/m06212-pure.cl
index f6228daea..a60a315ee 100644
--- a/OpenCL/m06212-pure.cl
+++ b/OpenCL/m06212-pure.cl
@@ -99,7 +99,7 @@ KERNEL_FQ void m06212_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -331,7 +331,7 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06213-pure.cl b/OpenCL/m06213-pure.cl
index 1bd79a274..45b304de7 100644
--- a/OpenCL/m06213-pure.cl
+++ b/OpenCL/m06213-pure.cl
@@ -99,7 +99,7 @@ KERNEL_FQ void m06213_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -331,7 +331,7 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06221-pure.cl b/OpenCL/m06221-pure.cl
index 75e6a556e..0e8dc1e5f 100644
--- a/OpenCL/m06221-pure.cl
+++ b/OpenCL/m06221-pure.cl
@@ -121,7 +121,7 @@ KERNEL_FQ void m06221_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -475,7 +475,7 @@ KERNEL_FQ void m06221_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06222-pure.cl b/OpenCL/m06222-pure.cl
index f39eba2a5..2af5c856f 100644
--- a/OpenCL/m06222-pure.cl
+++ b/OpenCL/m06222-pure.cl
@@ -121,7 +121,7 @@ KERNEL_FQ void m06222_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -475,7 +475,7 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06223-pure.cl b/OpenCL/m06223-pure.cl
index 2c2468f59..3721447bc 100644
--- a/OpenCL/m06223-pure.cl
+++ b/OpenCL/m06223-pure.cl
@@ -121,7 +121,7 @@ KERNEL_FQ void m06223_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -475,7 +475,7 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl
index 57fc6cdcc..57d84f57c 100644
--- a/OpenCL/m06231-pure.cl
+++ b/OpenCL/m06231-pure.cl
@@ -159,7 +159,7 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   /**
    * Whirlpool shared
@@ -191,7 +191,7 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -403,7 +403,7 @@ KERNEL_FQ void m06231_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -609,7 +609,7 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -657,7 +657,7 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl
index 9634349d5..b369aa7cb 100644
--- a/OpenCL/m06232-pure.cl
+++ b/OpenCL/m06232-pure.cl
@@ -159,7 +159,7 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   /**
    * Whirlpool shared
@@ -191,7 +191,7 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -403,7 +403,7 @@ KERNEL_FQ void m06232_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -609,7 +609,7 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -657,7 +657,7 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl
index 7e4066daa..a1b94ae89 100644
--- a/OpenCL/m06233-pure.cl
+++ b/OpenCL/m06233-pure.cl
@@ -159,7 +159,7 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   /**
    * Whirlpool shared
@@ -191,7 +191,7 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -403,7 +403,7 @@ KERNEL_FQ void m06233_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -609,7 +609,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -657,7 +657,7 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06600-pure.cl b/OpenCL/m06600-pure.cl
index b84adff7b..245877c4e 100644
--- a/OpenCL/m06600-pure.cl
+++ b/OpenCL/m06600-pure.cl
@@ -267,7 +267,7 @@ KERNEL_FQ void m06600_comp (KERN_ATTR_TMPS (agilekey_tmp_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06800-pure.cl b/OpenCL/m06800-pure.cl
index 2bf55cd0d..5b78bfc50 100644
--- a/OpenCL/m06800-pure.cl
+++ b/OpenCL/m06800-pure.cl
@@ -298,7 +298,7 @@ KERNEL_FQ void m06800_comp (KERN_ATTR_TMPS (lastpass_tmp_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m06900_a0-optimized.cl b/OpenCL/m06900_a0-optimized.cl
index d699856c0..3274d0665 100644
--- a/OpenCL/m06900_a0-optimized.cl
+++ b/OpenCL/m06900_a0-optimized.cl
@@ -721,7 +721,7 @@ KERNEL_FQ void m06900_m04 (KERN_ATTR_RULES ())
     s_tables[3][i] = c_tables[3][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -937,7 +937,7 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_RULES ())
     s_tables[3][i] = c_tables[3][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m06900_a1-optimized.cl b/OpenCL/m06900_a1-optimized.cl
index a6ca9bb5d..156b85b2f 100644
--- a/OpenCL/m06900_a1-optimized.cl
+++ b/OpenCL/m06900_a1-optimized.cl
@@ -719,7 +719,7 @@ KERNEL_FQ void m06900_m04 (KERN_ATTR_BASIC ())
     s_tables[3][i] = c_tables[3][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -989,7 +989,7 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_BASIC ())
     s_tables[3][i] = c_tables[3][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m06900_a3-optimized.cl b/OpenCL/m06900_a3-optimized.cl
index 5d9996777..d68d73cc8 100644
--- a/OpenCL/m06900_a3-optimized.cl
+++ b/OpenCL/m06900_a3-optimized.cl
@@ -1079,7 +1079,7 @@ KERNEL_FQ void m06900_m04 (KERN_ATTR_BASIC ())
     s_tables[3][i] = c_tables[3][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1148,7 +1148,7 @@ KERNEL_FQ void m06900_m08 (KERN_ATTR_BASIC ())
     s_tables[3][i] = c_tables[3][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1221,7 +1221,7 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_BASIC ())
     s_tables[3][i] = c_tables[3][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1290,7 +1290,7 @@ KERNEL_FQ void m06900_s08 (KERN_ATTR_BASIC ())
     s_tables[3][i] = c_tables[3][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl
index c3f3e6edf..6057ef561 100644
--- a/OpenCL/m08000_a0-optimized.cl
+++ b/OpenCL/m08000_a0-optimized.cl
@@ -236,7 +236,7 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_RULES ())
     w_s2[i] = 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (lid == 0)
   {
@@ -264,7 +264,7 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_RULES ())
     }
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -407,7 +407,7 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_RULES ())
     w_s2[i] = 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (lid == 0)
   {
@@ -435,7 +435,7 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_RULES ())
     }
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl
index defb4c2f0..3f597c2f2 100644
--- a/OpenCL/m08000_a1-optimized.cl
+++ b/OpenCL/m08000_a1-optimized.cl
@@ -234,7 +234,7 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_BASIC ())
     w_s2[i] = 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (lid == 0)
   {
@@ -262,7 +262,7 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_BASIC ())
     }
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -459,7 +459,7 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_BASIC ())
     w_s2[i] = 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (lid == 0)
   {
@@ -487,7 +487,7 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_BASIC ())
     }
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl
index 1439bce9e..0209e9805 100644
--- a/OpenCL/m08000_a3-optimized.cl
+++ b/OpenCL/m08000_a3-optimized.cl
@@ -231,7 +231,7 @@ DECLSPEC void m08000m (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, u32 *w, const u32
     w_s2[i] = 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (lid == 0)
   {
@@ -259,7 +259,7 @@ DECLSPEC void m08000m (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, u32 *w, const u32
     }
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -353,7 +353,7 @@ DECLSPEC void m08000s (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, u32 *w, const u32
     w_s2[i] = 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (lid == 0)
   {
@@ -381,7 +381,7 @@ DECLSPEC void m08000s (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, u32 *w, const u32
     }
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08400_a0-optimized.cl b/OpenCL/m08400_a0-optimized.cl
index 21a973218..6ca791374 100644
--- a/OpenCL/m08400_a0-optimized.cl
+++ b/OpenCL/m08400_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -306,7 +306,7 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08400_a0-pure.cl b/OpenCL/m08400_a0-pure.cl
index eb3ac3cae..cd55c0d83 100644
--- a/OpenCL/m08400_a0-pure.cl
+++ b/OpenCL/m08400_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -203,7 +203,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08400_a1-optimized.cl b/OpenCL/m08400_a1-optimized.cl
index 77c9a78e9..37a2189cd 100644
--- a/OpenCL/m08400_a1-optimized.cl
+++ b/OpenCL/m08400_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -362,7 +362,7 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08400_a1-pure.cl b/OpenCL/m08400_a1-pure.cl
index 67e3cb552..5d994aab7 100644
--- a/OpenCL/m08400_a1-pure.cl
+++ b/OpenCL/m08400_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -199,7 +199,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08400_a3-optimized.cl b/OpenCL/m08400_a3-optimized.cl
index ba99799e7..23474987f 100644
--- a/OpenCL/m08400_a3-optimized.cl
+++ b/OpenCL/m08400_a3-optimized.cl
@@ -482,7 +482,7 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -552,7 +552,7 @@ KERNEL_FQ void m08400_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -622,7 +622,7 @@ KERNEL_FQ void m08400_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -692,7 +692,7 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -762,7 +762,7 @@ KERNEL_FQ void m08400_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -832,7 +832,7 @@ KERNEL_FQ void m08400_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08400_a3-pure.cl b/OpenCL/m08400_a3-pure.cl
index 47847863c..1d495ade1 100644
--- a/OpenCL/m08400_a3-pure.cl
+++ b/OpenCL/m08400_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -216,7 +216,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08500_a0-pure.cl b/OpenCL/m08500_a0-pure.cl
index 04cefa798..fcb84c9b0 100644
--- a/OpenCL/m08500_a0-pure.cl
+++ b/OpenCL/m08500_a0-pure.cl
@@ -559,7 +559,7 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -657,7 +657,7 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08500_a1-pure.cl b/OpenCL/m08500_a1-pure.cl
index d7b02244b..6eb3590b9 100644
--- a/OpenCL/m08500_a1-pure.cl
+++ b/OpenCL/m08500_a1-pure.cl
@@ -557,7 +557,7 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -713,7 +713,7 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08500_a3-pure.cl b/OpenCL/m08500_a3-pure.cl
index 8ae9721c5..d50e4174d 100644
--- a/OpenCL/m08500_a3-pure.cl
+++ b/OpenCL/m08500_a3-pure.cl
@@ -695,7 +695,7 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -769,7 +769,7 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08600_a0-pure.cl b/OpenCL/m08600_a0-pure.cl
index c88b30787..621f90fc3 100644
--- a/OpenCL/m08600_a0-pure.cl
+++ b/OpenCL/m08600_a0-pure.cl
@@ -250,7 +250,7 @@ KERNEL_FQ void m08600_mxx (KERN_ATTR_RULES ())
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -314,7 +314,7 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_RULES ())
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08600_a1-pure.cl b/OpenCL/m08600_a1-pure.cl
index 5253d21dd..89ef057c8 100644
--- a/OpenCL/m08600_a1-pure.cl
+++ b/OpenCL/m08600_a1-pure.cl
@@ -248,7 +248,7 @@ KERNEL_FQ void m08600_mxx (KERN_ATTR_BASIC ())
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -372,7 +372,7 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_BASIC ())
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08600_a3-pure.cl b/OpenCL/m08600_a3-pure.cl
index cd596b5c0..3b579ac55 100644
--- a/OpenCL/m08600_a3-pure.cl
+++ b/OpenCL/m08600_a3-pure.cl
@@ -353,7 +353,7 @@ KERNEL_FQ void m08600_mxx (KERN_ATTR_VECTOR ())
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -410,7 +410,7 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_VECTOR ())
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08700_a0-optimized.cl b/OpenCL/m08700_a0-optimized.cl
index 6cb6a3b9f..bb18dbed7 100644
--- a/OpenCL/m08700_a0-optimized.cl
+++ b/OpenCL/m08700_a0-optimized.cl
@@ -298,7 +298,7 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -485,7 +485,7 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08700_a1-optimized.cl b/OpenCL/m08700_a1-optimized.cl
index 2041e3f5d..8f4c3e33a 100644
--- a/OpenCL/m08700_a1-optimized.cl
+++ b/OpenCL/m08700_a1-optimized.cl
@@ -296,7 +296,7 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -543,7 +543,7 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08700_a3-optimized.cl b/OpenCL/m08700_a3-optimized.cl
index 897bc16c5..bb05bf13f 100644
--- a/OpenCL/m08700_a3-optimized.cl
+++ b/OpenCL/m08700_a3-optimized.cl
@@ -575,7 +575,7 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -643,7 +643,7 @@ KERNEL_FQ void m08700_m08 (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -711,7 +711,7 @@ KERNEL_FQ void m08700_m16 (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -779,7 +779,7 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -847,7 +847,7 @@ KERNEL_FQ void m08700_s08 (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -915,7 +915,7 @@ KERNEL_FQ void m08700_s16 (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m08800-pure.cl b/OpenCL/m08800-pure.cl
index c8e5381e6..b4ef1c066 100644
--- a/OpenCL/m08800-pure.cl
+++ b/OpenCL/m08800-pure.cl
@@ -263,7 +263,7 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m09100-pure.cl b/OpenCL/m09100-pure.cl
index a0a3cd7d4..997b611bd 100644
--- a/OpenCL/m09100-pure.cl
+++ b/OpenCL/m09100-pure.cl
@@ -426,7 +426,7 @@ KERNEL_FQ void m09100_init (KERN_ATTR_TMPS (lotus8_tmp_t))
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m09400-pure.cl b/OpenCL/m09400-pure.cl
index 99747349f..a59d38007 100644
--- a/OpenCL/m09400-pure.cl
+++ b/OpenCL/m09400-pure.cl
@@ -165,7 +165,7 @@ KERNEL_FQ void m09400_comp (KERN_ATTR_TMPS_ESALT (office2007_tmp_t, office2007_t
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m09500-pure.cl b/OpenCL/m09500-pure.cl
index ddcbc5cb9..72bae3d63 100644
--- a/OpenCL/m09500-pure.cl
+++ b/OpenCL/m09500-pure.cl
@@ -163,7 +163,7 @@ KERNEL_FQ void m09500_comp (KERN_ATTR_TMPS_ESALT (office2010_tmp_t, office2010_t
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m09600-pure.cl b/OpenCL/m09600-pure.cl
index c4d0332ee..edbe62eba 100644
--- a/OpenCL/m09600-pure.cl
+++ b/OpenCL/m09600-pure.cl
@@ -209,7 +209,7 @@ KERNEL_FQ void m09600_comp (KERN_ATTR_TMPS_ESALT (office2013_tmp_t, office2013_t
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl
index 290761db2..8093d2f7a 100644
--- a/OpenCL/m10700-optimized.cl
+++ b/OpenCL/m10700-optimized.cl
@@ -608,7 +608,7 @@ KERNEL_FQ void m10700_loop (KERN_ATTR_TMPS_ESALT (pdf17l8_tmp_t, pdf_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m10700-pure.cl b/OpenCL/m10700-pure.cl
index 6b9dfc686..284a8ca49 100644
--- a/OpenCL/m10700-pure.cl
+++ b/OpenCL/m10700-pure.cl
@@ -1209,7 +1209,7 @@ KERNEL_FQ void m10700_loop (KERN_ATTR_TMPS_ESALT (pdf17l8_tmp_t, pdf_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11100_a0-optimized.cl b/OpenCL/m11100_a0-optimized.cl
index 7b2319f41..7fe39fc4f 100644
--- a/OpenCL/m11100_a0-optimized.cl
+++ b/OpenCL/m11100_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -386,7 +386,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11100_a0-pure.cl b/OpenCL/m11100_a0-pure.cl
index 821f39c07..8e3ca5378 100644
--- a/OpenCL/m11100_a0-pure.cl
+++ b/OpenCL/m11100_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -198,7 +198,7 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11100_a1-optimized.cl b/OpenCL/m11100_a1-optimized.cl
index d378ff025..bd89d18d9 100644
--- a/OpenCL/m11100_a1-optimized.cl
+++ b/OpenCL/m11100_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -444,7 +444,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11100_a1-pure.cl b/OpenCL/m11100_a1-pure.cl
index 92bdbac02..a870a76f8 100644
--- a/OpenCL/m11100_a1-pure.cl
+++ b/OpenCL/m11100_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -194,7 +194,7 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11100_a3-optimized.cl b/OpenCL/m11100_a3-optimized.cl
index 76f4dab57..190917c48 100644
--- a/OpenCL/m11100_a3-optimized.cl
+++ b/OpenCL/m11100_a3-optimized.cl
@@ -675,7 +675,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -745,7 +745,7 @@ KERNEL_FQ void m11100_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -815,7 +815,7 @@ KERNEL_FQ void m11100_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -885,7 +885,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -955,7 +955,7 @@ KERNEL_FQ void m11100_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1025,7 +1025,7 @@ KERNEL_FQ void m11100_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11100_a3-pure.cl b/OpenCL/m11100_a3-pure.cl
index a9e172740..9b1ef9e5c 100644
--- a/OpenCL/m11100_a3-pure.cl
+++ b/OpenCL/m11100_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -237,7 +237,7 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11300-pure.cl b/OpenCL/m11300-pure.cl
index cf0b06e7b..8cf78d701 100644
--- a/OpenCL/m11300-pure.cl
+++ b/OpenCL/m11300-pure.cl
@@ -246,7 +246,7 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11400_a0-pure.cl b/OpenCL/m11400_a0-pure.cl
index 28d917d9e..b57c7c20b 100644
--- a/OpenCL/m11400_a0-pure.cl
+++ b/OpenCL/m11400_a0-pure.cl
@@ -62,7 +62,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_RULES_ESALT (sip_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -160,7 +160,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_RULES_ESALT (sip_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11400_a1-pure.cl b/OpenCL/m11400_a1-pure.cl
index 92ac1c8fd..b77777ca5 100644
--- a/OpenCL/m11400_a1-pure.cl
+++ b/OpenCL/m11400_a1-pure.cl
@@ -60,7 +60,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_ESALT (sip_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -154,7 +154,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_ESALT (sip_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11400_a3-pure.cl b/OpenCL/m11400_a3-pure.cl
index a98a3f6ce..6b814d216 100644
--- a/OpenCL/m11400_a3-pure.cl
+++ b/OpenCL/m11400_a3-pure.cl
@@ -60,7 +60,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_VECTOR_ESALT (sip_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -180,7 +180,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_VECTOR_ESALT (sip_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11700_a0-optimized.cl b/OpenCL/m11700_a0-optimized.cl
index e42cc57ac..3dc636ebe 100644
--- a/OpenCL/m11700_a0-optimized.cl
+++ b/OpenCL/m11700_a0-optimized.cl
@@ -114,7 +114,7 @@ KERNEL_FQ void m11700_m04 (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -272,7 +272,7 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11700_a0-pure.cl b/OpenCL/m11700_a0-pure.cl
index d651caa92..20461ae49 100644
--- a/OpenCL/m11700_a0-pure.cl
+++ b/OpenCL/m11700_a0-pure.cl
@@ -45,7 +45,7 @@ KERNEL_FQ void m11700_mxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -118,7 +118,7 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11700_a1-optimized.cl b/OpenCL/m11700_a1-optimized.cl
index 982e53128..846c25d36 100644
--- a/OpenCL/m11700_a1-optimized.cl
+++ b/OpenCL/m11700_a1-optimized.cl
@@ -112,7 +112,7 @@ KERNEL_FQ void m11700_m04 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -328,7 +328,7 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11700_a1-pure.cl b/OpenCL/m11700_a1-pure.cl
index 4046115f1..8d37a83ed 100644
--- a/OpenCL/m11700_a1-pure.cl
+++ b/OpenCL/m11700_a1-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11700_mxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -114,7 +114,7 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11700_a3-optimized.cl b/OpenCL/m11700_a3-optimized.cl
index 7a13ed762..45baeb97a 100644
--- a/OpenCL/m11700_a3-optimized.cl
+++ b/OpenCL/m11700_a3-optimized.cl
@@ -281,7 +281,7 @@ KERNEL_FQ void m11700_m04 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -345,7 +345,7 @@ KERNEL_FQ void m11700_m08 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -409,7 +409,7 @@ KERNEL_FQ void m11700_m16 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -473,7 +473,7 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -537,7 +537,7 @@ KERNEL_FQ void m11700_s08 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -601,7 +601,7 @@ KERNEL_FQ void m11700_s16 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11700_a3-pure.cl b/OpenCL/m11700_a3-pure.cl
index 46988ca46..6261703a8 100644
--- a/OpenCL/m11700_a3-pure.cl
+++ b/OpenCL/m11700_a3-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11700_mxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -127,7 +127,7 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11750_a0-pure.cl b/OpenCL/m11750_a0-pure.cl
index 8f1a4f03e..6e4f071db 100644
--- a/OpenCL/m11750_a0-pure.cl
+++ b/OpenCL/m11750_a0-pure.cl
@@ -45,7 +45,7 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -127,7 +127,7 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11750_a1-pure.cl b/OpenCL/m11750_a1-pure.cl
index fef704bb5..98f023f6b 100644
--- a/OpenCL/m11750_a1-pure.cl
+++ b/OpenCL/m11750_a1-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -150,7 +150,7 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11750_a3-pure.cl b/OpenCL/m11750_a3-pure.cl
index 389818a4f..d1e989c82 100644
--- a/OpenCL/m11750_a3-pure.cl
+++ b/OpenCL/m11750_a3-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -136,7 +136,7 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11760_a0-pure.cl b/OpenCL/m11760_a0-pure.cl
index 23784cd05..787c21ac7 100644
--- a/OpenCL/m11760_a0-pure.cl
+++ b/OpenCL/m11760_a0-pure.cl
@@ -45,7 +45,7 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -129,7 +129,7 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11760_a1-pure.cl b/OpenCL/m11760_a1-pure.cl
index 1f2f7fa41..62bae3ee6 100644
--- a/OpenCL/m11760_a1-pure.cl
+++ b/OpenCL/m11760_a1-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -152,7 +152,7 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11760_a3-pure.cl b/OpenCL/m11760_a3-pure.cl
index 7096f4616..0ec476ac7 100644
--- a/OpenCL/m11760_a3-pure.cl
+++ b/OpenCL/m11760_a3-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -138,7 +138,7 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob256_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11800_a0-optimized.cl b/OpenCL/m11800_a0-optimized.cl
index 30b8ffc46..2c4518667 100644
--- a/OpenCL/m11800_a0-optimized.cl
+++ b/OpenCL/m11800_a0-optimized.cl
@@ -114,7 +114,7 @@ KERNEL_FQ void m11800_m04 (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -272,7 +272,7 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11800_a0-pure.cl b/OpenCL/m11800_a0-pure.cl
index e21d9eae6..41315fc64 100644
--- a/OpenCL/m11800_a0-pure.cl
+++ b/OpenCL/m11800_a0-pure.cl
@@ -45,7 +45,7 @@ KERNEL_FQ void m11800_mxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -118,7 +118,7 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11800_a1-optimized.cl b/OpenCL/m11800_a1-optimized.cl
index 34846963d..ba0843fa2 100644
--- a/OpenCL/m11800_a1-optimized.cl
+++ b/OpenCL/m11800_a1-optimized.cl
@@ -112,7 +112,7 @@ KERNEL_FQ void m11800_m04 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -328,7 +328,7 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11800_a1-pure.cl b/OpenCL/m11800_a1-pure.cl
index 52cb7bd02..8f05197ef 100644
--- a/OpenCL/m11800_a1-pure.cl
+++ b/OpenCL/m11800_a1-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11800_mxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -114,7 +114,7 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11800_a3-optimized.cl b/OpenCL/m11800_a3-optimized.cl
index 32a2746ce..f9538252f 100644
--- a/OpenCL/m11800_a3-optimized.cl
+++ b/OpenCL/m11800_a3-optimized.cl
@@ -281,7 +281,7 @@ KERNEL_FQ void m11800_m04 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -345,7 +345,7 @@ KERNEL_FQ void m11800_m08 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -409,7 +409,7 @@ KERNEL_FQ void m11800_m16 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -473,7 +473,7 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -537,7 +537,7 @@ KERNEL_FQ void m11800_s08 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -601,7 +601,7 @@ KERNEL_FQ void m11800_s16 (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m11800_a3-pure.cl b/OpenCL/m11800_a3-pure.cl
index 12d1899c7..963d004db 100644
--- a/OpenCL/m11800_a3-pure.cl
+++ b/OpenCL/m11800_a3-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11800_mxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -127,7 +127,7 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11850_a0-pure.cl b/OpenCL/m11850_a0-pure.cl
index 703ad9ac9..db98c9529 100644
--- a/OpenCL/m11850_a0-pure.cl
+++ b/OpenCL/m11850_a0-pure.cl
@@ -45,7 +45,7 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -127,7 +127,7 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11850_a1-pure.cl b/OpenCL/m11850_a1-pure.cl
index 6f8c93e1b..e7eac3084 100644
--- a/OpenCL/m11850_a1-pure.cl
+++ b/OpenCL/m11850_a1-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -150,7 +150,7 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11850_a3-pure.cl b/OpenCL/m11850_a3-pure.cl
index b273a6ef4..91e2da87f 100644
--- a/OpenCL/m11850_a3-pure.cl
+++ b/OpenCL/m11850_a3-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -136,7 +136,7 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11860_a0-pure.cl b/OpenCL/m11860_a0-pure.cl
index 0f20573ea..318c87fdf 100644
--- a/OpenCL/m11860_a0-pure.cl
+++ b/OpenCL/m11860_a0-pure.cl
@@ -45,7 +45,7 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -129,7 +129,7 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_RULES ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11860_a1-pure.cl b/OpenCL/m11860_a1-pure.cl
index 272f8a61c..e7880e570 100644
--- a/OpenCL/m11860_a1-pure.cl
+++ b/OpenCL/m11860_a1-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -152,7 +152,7 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_BASIC ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m11860_a3-pure.cl b/OpenCL/m11860_a3-pure.cl
index 7c54204be..1d920283b 100644
--- a/OpenCL/m11860_a3-pure.cl
+++ b/OpenCL/m11860_a3-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -138,7 +138,7 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_VECTOR ())
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m12400-pure.cl b/OpenCL/m12400-pure.cl
index d0116b0ac..e7bc2e27e 100644
--- a/OpenCL/m12400-pure.cl
+++ b/OpenCL/m12400-pure.cl
@@ -534,7 +534,7 @@ KERNEL_FQ void m12400_init (KERN_ATTR_TMPS (bsdicrypt_tmp_t))
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -670,7 +670,7 @@ KERNEL_FQ void m12400_loop (KERN_ATTR_TMPS (bsdicrypt_tmp_t))
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl
index 94d525cf8..99920ff71 100644
--- a/OpenCL/m12500-pure.cl
+++ b/OpenCL/m12500-pure.cl
@@ -318,7 +318,7 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m12600_a0-optimized.cl b/OpenCL/m12600_a0-optimized.cl
index 6106316ec..60a9461cd 100644
--- a/OpenCL/m12600_a0-optimized.cl
+++ b/OpenCL/m12600_a0-optimized.cl
@@ -53,7 +53,7 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -393,7 +393,7 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m12600_a0-pure.cl b/OpenCL/m12600_a0-pure.cl
index d2f8fb7b3..66ca371dc 100644
--- a/OpenCL/m12600_a0-pure.cl
+++ b/OpenCL/m12600_a0-pure.cl
@@ -53,7 +53,7 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -189,7 +189,7 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m12600_a1-optimized.cl b/OpenCL/m12600_a1-optimized.cl
index 728bc49a5..ea5805154 100644
--- a/OpenCL/m12600_a1-optimized.cl
+++ b/OpenCL/m12600_a1-optimized.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -449,7 +449,7 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m12600_a1-pure.cl b/OpenCL/m12600_a1-pure.cl
index 6518aaca0..3b2730f66 100644
--- a/OpenCL/m12600_a1-pure.cl
+++ b/OpenCL/m12600_a1-pure.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -185,7 +185,7 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m12600_a3-optimized.cl b/OpenCL/m12600_a3-optimized.cl
index e42a0754b..15ec957ac 100644
--- a/OpenCL/m12600_a3-optimized.cl
+++ b/OpenCL/m12600_a3-optimized.cl
@@ -648,7 +648,7 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -718,7 +718,7 @@ KERNEL_FQ void m12600_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -788,7 +788,7 @@ KERNEL_FQ void m12600_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -858,7 +858,7 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -928,7 +928,7 @@ KERNEL_FQ void m12600_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -998,7 +998,7 @@ KERNEL_FQ void m12600_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m12600_a3-pure.cl b/OpenCL/m12600_a3-pure.cl
index d0e80b99f..bf9cfb7f0 100644
--- a/OpenCL/m12600_a3-pure.cl
+++ b/OpenCL/m12600_a3-pure.cl
@@ -51,7 +51,7 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -198,7 +198,7 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m12700-pure.cl b/OpenCL/m12700-pure.cl
index 3d2638062..4a0c72d11 100644
--- a/OpenCL/m12700-pure.cl
+++ b/OpenCL/m12700-pure.cl
@@ -276,7 +276,7 @@ KERNEL_FQ void m12700_comp (KERN_ATTR_TMPS (mywallet_tmp_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m12800-pure.cl b/OpenCL/m12800-pure.cl
index 0c32d564f..52318e9ce 100644
--- a/OpenCL/m12800-pure.cl
+++ b/OpenCL/m12800-pure.cl
@@ -100,7 +100,7 @@ KERNEL_FQ void m12800_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
                  | ((i1 < 10) ? '0' + i1 : 'A' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13200-pure.cl b/OpenCL/m13200-pure.cl
index 41f912b0b..0ade009bd 100644
--- a/OpenCL/m13200-pure.cl
+++ b/OpenCL/m13200-pure.cl
@@ -112,7 +112,7 @@ KERNEL_FQ void m13200_loop (KERN_ATTR_TMPS (axcrypt_tmp_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13400-pure.cl b/OpenCL/m13400-pure.cl
index d77ecb680..5504ac07c 100644
--- a/OpenCL/m13400-pure.cl
+++ b/OpenCL/m13400-pure.cl
@@ -190,7 +190,7 @@ KERNEL_FQ void m13400_loop (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -290,7 +290,7 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13711-pure.cl b/OpenCL/m13711-pure.cl
index d3f29166e..e46f64018 100644
--- a/OpenCL/m13711-pure.cl
+++ b/OpenCL/m13711-pure.cl
@@ -140,7 +140,7 @@ KERNEL_FQ void m13711_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -287,7 +287,7 @@ KERNEL_FQ void m13711_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -471,7 +471,7 @@ KERNEL_FQ void m13711_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13712-pure.cl b/OpenCL/m13712-pure.cl
index 67b09a3e8..09214eb6c 100644
--- a/OpenCL/m13712-pure.cl
+++ b/OpenCL/m13712-pure.cl
@@ -191,7 +191,7 @@ KERNEL_FQ void m13712_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -338,7 +338,7 @@ KERNEL_FQ void m13712_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -523,7 +523,7 @@ KERNEL_FQ void m13712_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13713-pure.cl b/OpenCL/m13713-pure.cl
index d6ea2a0e6..18ec22e13 100644
--- a/OpenCL/m13713-pure.cl
+++ b/OpenCL/m13713-pure.cl
@@ -256,7 +256,7 @@ KERNEL_FQ void m13713_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -403,7 +403,7 @@ KERNEL_FQ void m13713_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -589,7 +589,7 @@ KERNEL_FQ void m13713_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13721-pure.cl b/OpenCL/m13721-pure.cl
index 55f3df252..1a58a3feb 100644
--- a/OpenCL/m13721-pure.cl
+++ b/OpenCL/m13721-pure.cl
@@ -162,7 +162,7 @@ KERNEL_FQ void m13721_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -390,7 +390,7 @@ KERNEL_FQ void m13721_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -618,7 +618,7 @@ KERNEL_FQ void m13721_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13722-pure.cl b/OpenCL/m13722-pure.cl
index 2e944cb50..ebb5d377a 100644
--- a/OpenCL/m13722-pure.cl
+++ b/OpenCL/m13722-pure.cl
@@ -213,7 +213,7 @@ KERNEL_FQ void m13722_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -441,7 +441,7 @@ KERNEL_FQ void m13722_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -670,7 +670,7 @@ KERNEL_FQ void m13722_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13723-pure.cl b/OpenCL/m13723-pure.cl
index c1dd74f80..ce6924637 100644
--- a/OpenCL/m13723-pure.cl
+++ b/OpenCL/m13723-pure.cl
@@ -278,7 +278,7 @@ KERNEL_FQ void m13723_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -506,7 +506,7 @@ KERNEL_FQ void m13723_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -736,7 +736,7 @@ KERNEL_FQ void m13723_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13731-pure.cl b/OpenCL/m13731-pure.cl
index 24d9cd1de..33fcd4ad8 100644
--- a/OpenCL/m13731-pure.cl
+++ b/OpenCL/m13731-pure.cl
@@ -200,7 +200,7 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   /**
    * Whirlpool shared
@@ -232,7 +232,7 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -447,7 +447,7 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -495,7 +495,7 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -758,7 +758,7 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -806,7 +806,7 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13732-pure.cl b/OpenCL/m13732-pure.cl
index a5e7ee9b9..ac382dcb8 100644
--- a/OpenCL/m13732-pure.cl
+++ b/OpenCL/m13732-pure.cl
@@ -251,7 +251,7 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   /**
    * Whirlpool shared
@@ -283,7 +283,7 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -498,7 +498,7 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -546,7 +546,7 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -810,7 +810,7 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -858,7 +858,7 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13733-pure.cl b/OpenCL/m13733-pure.cl
index 38f8060e3..122fd306a 100644
--- a/OpenCL/m13733-pure.cl
+++ b/OpenCL/m13733-pure.cl
@@ -316,7 +316,7 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   /**
    * Whirlpool shared
@@ -348,7 +348,7 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -563,7 +563,7 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -611,7 +611,7 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -876,7 +876,7 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -924,7 +924,7 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_Cl[7][i] = Cl[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13751-pure.cl b/OpenCL/m13751-pure.cl
index 8ff4e0717..528b608b9 100644
--- a/OpenCL/m13751-pure.cl
+++ b/OpenCL/m13751-pure.cl
@@ -146,7 +146,7 @@ KERNEL_FQ void m13751_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -322,7 +322,7 @@ KERNEL_FQ void m13751_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -559,7 +559,7 @@ KERNEL_FQ void m13751_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13752-pure.cl b/OpenCL/m13752-pure.cl
index 135ead320..421eca8bb 100644
--- a/OpenCL/m13752-pure.cl
+++ b/OpenCL/m13752-pure.cl
@@ -197,7 +197,7 @@ KERNEL_FQ void m13752_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -373,7 +373,7 @@ KERNEL_FQ void m13752_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -582,7 +582,7 @@ KERNEL_FQ void m13752_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13753-pure.cl b/OpenCL/m13753-pure.cl
index 28c56ee1b..ce0021da5 100644
--- a/OpenCL/m13753-pure.cl
+++ b/OpenCL/m13753-pure.cl
@@ -262,7 +262,7 @@ KERNEL_FQ void m13753_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -438,7 +438,7 @@ KERNEL_FQ void m13753_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -648,7 +648,7 @@ KERNEL_FQ void m13753_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13771-pure.cl b/OpenCL/m13771-pure.cl
index b3b76052c..eeb767e63 100644
--- a/OpenCL/m13771-pure.cl
+++ b/OpenCL/m13771-pure.cl
@@ -188,7 +188,7 @@ KERNEL_FQ void m13771_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #ifdef REAL_SHM
 
@@ -206,7 +206,7 @@ KERNEL_FQ void m13771_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -420,7 +420,7 @@ KERNEL_FQ void m13771_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -651,7 +651,7 @@ KERNEL_FQ void m13771_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13772-pure.cl b/OpenCL/m13772-pure.cl
index eb7864d99..80882acd3 100644
--- a/OpenCL/m13772-pure.cl
+++ b/OpenCL/m13772-pure.cl
@@ -239,7 +239,7 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #ifdef REAL_SHM
 
@@ -257,7 +257,7 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -471,7 +471,7 @@ KERNEL_FQ void m13772_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -703,7 +703,7 @@ KERNEL_FQ void m13772_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13773-pure.cl b/OpenCL/m13773-pure.cl
index eea140305..beb700902 100644
--- a/OpenCL/m13773-pure.cl
+++ b/OpenCL/m13773-pure.cl
@@ -304,7 +304,7 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #ifdef REAL_SHM
 
@@ -322,7 +322,7 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -536,7 +536,7 @@ KERNEL_FQ void m13773_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_sbob_sl64[7][i] = sbob512_sl64[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -769,7 +769,7 @@ KERNEL_FQ void m13773_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl
index c12745182..f2be74613 100644
--- a/OpenCL/m13800_a0-optimized.cl
+++ b/OpenCL/m13800_a0-optimized.cl
@@ -441,7 +441,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_RULES_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -637,7 +637,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_RULES_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl
index 3a432d316..75777a4c2 100644
--- a/OpenCL/m13800_a1-optimized.cl
+++ b/OpenCL/m13800_a1-optimized.cl
@@ -439,7 +439,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -691,7 +691,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl
index 46e422df4..4a30aec4a 100644
--- a/OpenCL/m13800_a3-optimized.cl
+++ b/OpenCL/m13800_a3-optimized.cl
@@ -753,7 +753,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -810,7 +810,7 @@ KERNEL_FQ void m13800_m08 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -867,7 +867,7 @@ KERNEL_FQ void m13800_m16 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -924,7 +924,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -981,7 +981,7 @@ KERNEL_FQ void m13800_s08 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1038,7 +1038,7 @@ KERNEL_FQ void m13800_s16 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
     s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13900_a0-optimized.cl b/OpenCL/m13900_a0-optimized.cl
index 4dce764dc..72ce8024b 100644
--- a/OpenCL/m13900_a0-optimized.cl
+++ b/OpenCL/m13900_a0-optimized.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -279,7 +279,7 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13900_a0-pure.cl b/OpenCL/m13900_a0-pure.cl
index 81ac9fe94..dae69122d 100644
--- a/OpenCL/m13900_a0-pure.cl
+++ b/OpenCL/m13900_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -203,7 +203,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13900_a1-optimized.cl b/OpenCL/m13900_a1-optimized.cl
index 3290633d1..7a5214719 100644
--- a/OpenCL/m13900_a1-optimized.cl
+++ b/OpenCL/m13900_a1-optimized.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -335,7 +335,7 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13900_a1-pure.cl b/OpenCL/m13900_a1-pure.cl
index 92d5eb003..1b4c8c6ce 100644
--- a/OpenCL/m13900_a1-pure.cl
+++ b/OpenCL/m13900_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -199,7 +199,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13900_a3-optimized.cl b/OpenCL/m13900_a3-optimized.cl
index 10b4b93f7..e446de3db 100644
--- a/OpenCL/m13900_a3-optimized.cl
+++ b/OpenCL/m13900_a3-optimized.cl
@@ -439,7 +439,7 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -509,7 +509,7 @@ KERNEL_FQ void m13900_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -579,7 +579,7 @@ KERNEL_FQ void m13900_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -649,7 +649,7 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -719,7 +719,7 @@ KERNEL_FQ void m13900_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -789,7 +789,7 @@ KERNEL_FQ void m13900_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m13900_a3-pure.cl b/OpenCL/m13900_a3-pure.cl
index b0e85397d..7f8b5246d 100644
--- a/OpenCL/m13900_a3-pure.cl
+++ b/OpenCL/m13900_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -216,7 +216,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14000_a0-pure.cl b/OpenCL/m14000_a0-pure.cl
index 2be784b5a..d99c0eda2 100644
--- a/OpenCL/m14000_a0-pure.cl
+++ b/OpenCL/m14000_a0-pure.cl
@@ -534,7 +534,7 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -639,7 +639,7 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14000_a1-pure.cl b/OpenCL/m14000_a1-pure.cl
index 38367ff6a..4b85567a8 100644
--- a/OpenCL/m14000_a1-pure.cl
+++ b/OpenCL/m14000_a1-pure.cl
@@ -524,7 +524,7 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -672,7 +672,7 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14100_a0-pure.cl b/OpenCL/m14100_a0-pure.cl
index 99ffdb703..1efcaee4a 100644
--- a/OpenCL/m14100_a0-pure.cl
+++ b/OpenCL/m14100_a0-pure.cl
@@ -578,7 +578,7 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -713,7 +713,7 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14100_a1-pure.cl b/OpenCL/m14100_a1-pure.cl
index aa861b19f..d1a9465c1 100644
--- a/OpenCL/m14100_a1-pure.cl
+++ b/OpenCL/m14100_a1-pure.cl
@@ -568,7 +568,7 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -751,7 +751,7 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14100_a3-pure.cl b/OpenCL/m14100_a3-pure.cl
index 1943413a8..4414be3e0 100644
--- a/OpenCL/m14100_a3-pure.cl
+++ b/OpenCL/m14100_a3-pure.cl
@@ -750,7 +750,7 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -824,7 +824,7 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14400_a0-optimized.cl b/OpenCL/m14400_a0-optimized.cl
index f6934b1d0..1cfb306b2 100644
--- a/OpenCL/m14400_a0-optimized.cl
+++ b/OpenCL/m14400_a0-optimized.cl
@@ -142,7 +142,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -413,7 +413,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14400_a0-pure.cl b/OpenCL/m14400_a0-pure.cl
index 9271e5ae0..fc7a9350a 100644
--- a/OpenCL/m14400_a0-pure.cl
+++ b/OpenCL/m14400_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -299,7 +299,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14400_a1-optimized.cl b/OpenCL/m14400_a1-optimized.cl
index a9259b61c..d35895338 100644
--- a/OpenCL/m14400_a1-optimized.cl
+++ b/OpenCL/m14400_a1-optimized.cl
@@ -142,7 +142,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -477,7 +477,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14400_a1-pure.cl b/OpenCL/m14400_a1-pure.cl
index 88bf46bad..c90002326 100644
--- a/OpenCL/m14400_a1-pure.cl
+++ b/OpenCL/m14400_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -295,7 +295,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14400_a3-optimized.cl b/OpenCL/m14400_a3-optimized.cl
index 07938f501..6222f1214 100644
--- a/OpenCL/m14400_a3-optimized.cl
+++ b/OpenCL/m14400_a3-optimized.cl
@@ -652,7 +652,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -722,7 +722,7 @@ KERNEL_FQ void m14400_m08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -792,7 +792,7 @@ KERNEL_FQ void m14400_m16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -862,7 +862,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -932,7 +932,7 @@ KERNEL_FQ void m14400_s08 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -1002,7 +1002,7 @@ KERNEL_FQ void m14400_s16 (KERN_ATTR_BASIC ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14400_a3-pure.cl b/OpenCL/m14400_a3-pure.cl
index 7976d7bc5..4dd93ffaf 100644
--- a/OpenCL/m14400_a3-pure.cl
+++ b/OpenCL/m14400_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -320,7 +320,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14611-pure.cl b/OpenCL/m14611-pure.cl
index 3acddaf75..1785ceb3a 100644
--- a/OpenCL/m14611-pure.cl
+++ b/OpenCL/m14611-pure.cl
@@ -330,7 +330,7 @@ KERNEL_FQ void m14611_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m14621-pure.cl b/OpenCL/m14621-pure.cl
index 058c6e8a2..11ea28861 100644
--- a/OpenCL/m14621-pure.cl
+++ b/OpenCL/m14621-pure.cl
@@ -369,7 +369,7 @@ KERNEL_FQ void m14621_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m14631-pure.cl b/OpenCL/m14631-pure.cl
index 99da8e3fb..2570b5a44 100644
--- a/OpenCL/m14631-pure.cl
+++ b/OpenCL/m14631-pure.cl
@@ -425,7 +425,7 @@ KERNEL_FQ void m14631_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m14641-pure.cl b/OpenCL/m14641-pure.cl
index 4007b8a10..3deef6114 100644
--- a/OpenCL/m14641-pure.cl
+++ b/OpenCL/m14641-pure.cl
@@ -330,7 +330,7 @@ KERNEL_FQ void m14641_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m14700-pure.cl b/OpenCL/m14700-pure.cl
index fb3673d67..df90ba694 100644
--- a/OpenCL/m14700-pure.cl
+++ b/OpenCL/m14700-pure.cl
@@ -266,7 +266,7 @@ KERNEL_FQ void m14700_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, itunes_back
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m14800-pure.cl b/OpenCL/m14800-pure.cl
index 578fe9716..bb5fb88fc 100644
--- a/OpenCL/m14800-pure.cl
+++ b/OpenCL/m14800-pure.cl
@@ -530,7 +530,7 @@ KERNEL_FQ void m14800_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, itunes_ba
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m14900_a0-optimized.cl b/OpenCL/m14900_a0-optimized.cl
index 36ca69d9c..5f015d28f 100644
--- a/OpenCL/m14900_a0-optimized.cl
+++ b/OpenCL/m14900_a0-optimized.cl
@@ -125,7 +125,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_RULES ())
     s_ftable[i] = c_ftable[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -216,7 +216,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_RULES ())
     s_ftable[i] = c_ftable[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14900_a1-optimized.cl b/OpenCL/m14900_a1-optimized.cl
index 42c7dd6c9..c6b740235 100644
--- a/OpenCL/m14900_a1-optimized.cl
+++ b/OpenCL/m14900_a1-optimized.cl
@@ -123,7 +123,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_BASIC ())
     s_ftable[i] = c_ftable[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -278,7 +278,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_BASIC ())
     s_ftable[i] = c_ftable[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m14900_a3-optimized.cl b/OpenCL/m14900_a3-optimized.cl
index b8a7ce4b1..cefd7cb98 100644
--- a/OpenCL/m14900_a3-optimized.cl
+++ b/OpenCL/m14900_a3-optimized.cl
@@ -231,7 +231,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_BASIC ())
     s_ftable[i] = c_ftable[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -305,7 +305,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_BASIC ())
     s_ftable[i] = c_ftable[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m15300-pure.cl b/OpenCL/m15300-pure.cl
index b1c8fb0b4..8d7bdf942 100644
--- a/OpenCL/m15300-pure.cl
+++ b/OpenCL/m15300-pure.cl
@@ -407,7 +407,7 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t))
     s_skb[7][i] = c_skb[7][i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m15900-pure.cl b/OpenCL/m15900-pure.cl
index dbb59c80a..51fdfa0eb 100644
--- a/OpenCL/m15900-pure.cl
+++ b/OpenCL/m15900-pure.cl
@@ -539,7 +539,7 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m16000_a0-pure.cl b/OpenCL/m16000_a0-pure.cl
index ffbc3a6e2..cf02c5325 100644
--- a/OpenCL/m16000_a0-pure.cl
+++ b/OpenCL/m16000_a0-pure.cl
@@ -538,7 +538,7 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_RULES ())
     s_tripcode_salt[i] = c_tripcode_salt[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -631,7 +631,7 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_RULES ())
     s_tripcode_salt[i] = c_tripcode_salt[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m16000_a1-pure.cl b/OpenCL/m16000_a1-pure.cl
index c5af56fce..775be6c57 100644
--- a/OpenCL/m16000_a1-pure.cl
+++ b/OpenCL/m16000_a1-pure.cl
@@ -536,7 +536,7 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_BASIC ())
     s_tripcode_salt[i] = c_tripcode_salt[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -708,7 +708,7 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_BASIC ())
     s_tripcode_salt[i] = c_tripcode_salt[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m16000_a3-pure.cl b/OpenCL/m16000_a3-pure.cl
index c7cb2311f..23979dded 100644
--- a/OpenCL/m16000_a3-pure.cl
+++ b/OpenCL/m16000_a3-pure.cl
@@ -536,7 +536,7 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_VECTOR ())
     s_tripcode_salt[i] = c_tripcode_salt[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -657,7 +657,7 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_VECTOR ())
     s_tripcode_salt[i] = c_tripcode_salt[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m16200-pure.cl b/OpenCL/m16200-pure.cl
index ee1906bee..2b28662f7 100644
--- a/OpenCL/m16200-pure.cl
+++ b/OpenCL/m16200-pure.cl
@@ -307,7 +307,7 @@ KERNEL_FQ void m16200_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m16300-pure.cl b/OpenCL/m16300-pure.cl
index 908adb69f..6a6c2a6a8 100644
--- a/OpenCL/m16300-pure.cl
+++ b/OpenCL/m16300-pure.cl
@@ -441,7 +441,7 @@ KERNEL_FQ void m16300_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ethereum_
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m16600_a0-optimized.cl b/OpenCL/m16600_a0-optimized.cl
index c8751feb5..4422d0e70 100644
--- a/OpenCL/m16600_a0-optimized.cl
+++ b/OpenCL/m16600_a0-optimized.cl
@@ -67,7 +67,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -452,7 +452,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m16600_a0-pure.cl b/OpenCL/m16600_a0-pure.cl
index 358543031..5c33c9fbd 100644
--- a/OpenCL/m16600_a0-pure.cl
+++ b/OpenCL/m16600_a0-pure.cl
@@ -67,7 +67,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -258,7 +258,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m16600_a1-optimized.cl b/OpenCL/m16600_a1-optimized.cl
index c9c8212b6..c5942f445 100644
--- a/OpenCL/m16600_a1-optimized.cl
+++ b/OpenCL/m16600_a1-optimized.cl
@@ -65,7 +65,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -508,7 +508,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m16600_a1-pure.cl b/OpenCL/m16600_a1-pure.cl
index a0be77baa..535916cd1 100644
--- a/OpenCL/m16600_a1-pure.cl
+++ b/OpenCL/m16600_a1-pure.cl
@@ -65,7 +65,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -254,7 +254,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m16600_a3-optimized.cl b/OpenCL/m16600_a3-optimized.cl
index 04a1a7138..94729cbf9 100644
--- a/OpenCL/m16600_a3-optimized.cl
+++ b/OpenCL/m16600_a3-optimized.cl
@@ -362,7 +362,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -457,7 +457,7 @@ KERNEL_FQ void m16600_m08 (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -552,7 +552,7 @@ KERNEL_FQ void m16600_m16 (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -647,7 +647,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -742,7 +742,7 @@ KERNEL_FQ void m16600_s08 (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -837,7 +837,7 @@ KERNEL_FQ void m16600_s16 (KERN_ATTR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m16600_a3-pure.cl b/OpenCL/m16600_a3-pure.cl
index 7a68e1ff2..a8e3775d8 100644
--- a/OpenCL/m16600_a3-pure.cl
+++ b/OpenCL/m16600_a3-pure.cl
@@ -65,7 +65,7 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
@@ -267,7 +267,7 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m18300-pure.cl b/OpenCL/m18300-pure.cl
index 6ae8fba68..9e1189d75 100644
--- a/OpenCL/m18300-pure.cl
+++ b/OpenCL/m18300-pure.cl
@@ -307,7 +307,7 @@ KERNEL_FQ void m18300_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m18400-pure.cl b/OpenCL/m18400-pure.cl
index f7abaea8d..3a4a80c27 100644
--- a/OpenCL/m18400-pure.cl
+++ b/OpenCL/m18400-pure.cl
@@ -303,7 +303,7 @@ KERNEL_FQ void m18400_comp (KERN_ATTR_TMPS_ESALT (odf12_tmp_t, odf12_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m18500_a0-pure.cl b/OpenCL/m18500_a0-pure.cl
index b4cf2faf6..7a68132b8 100644
--- a/OpenCL/m18500_a0-pure.cl
+++ b/OpenCL/m18500_a0-pure.cl
@@ -49,7 +49,7 @@ KERNEL_FQ void m18500_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -165,7 +165,7 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m18500_a1-pure.cl b/OpenCL/m18500_a1-pure.cl
index cc48a7134..504d2577e 100644
--- a/OpenCL/m18500_a1-pure.cl
+++ b/OpenCL/m18500_a1-pure.cl
@@ -49,7 +49,7 @@ KERNEL_FQ void m18500_mxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -164,7 +164,7 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_RULES ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m18500_a3-pure.cl b/OpenCL/m18500_a3-pure.cl
index 46f8624f7..a34072250 100644
--- a/OpenCL/m18500_a3-pure.cl
+++ b/OpenCL/m18500_a3-pure.cl
@@ -47,7 +47,7 @@ KERNEL_FQ void m18500_mxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -174,7 +174,7 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_VECTOR ())
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m18900-pure.cl b/OpenCL/m18900-pure.cl
index 342a203a5..980d0bea9 100644
--- a/OpenCL/m18900-pure.cl
+++ b/OpenCL/m18900-pure.cl
@@ -271,7 +271,7 @@ KERNEL_FQ void m18900_comp (KERN_ATTR_TMPS_ESALT (android_backup_tmp_t, android_
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m19500_a0-pure.cl b/OpenCL/m19500_a0-pure.cl
index 9ec4dafb1..31f2b56db 100644
--- a/OpenCL/m19500_a0-pure.cl
+++ b/OpenCL/m19500_a0-pure.cl
@@ -62,7 +62,7 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_RULES_ESALT (devise_hash_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -197,7 +197,7 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_RULES_ESALT (devise_hash_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m19500_a1-pure.cl b/OpenCL/m19500_a1-pure.cl
index 9760a360a..00f6bc9e7 100644
--- a/OpenCL/m19500_a1-pure.cl
+++ b/OpenCL/m19500_a1-pure.cl
@@ -60,7 +60,7 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_ESALT (devise_hash_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -191,7 +191,7 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_ESALT (devise_hash_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m19500_a3-pure.cl b/OpenCL/m19500_a3-pure.cl
index 3900c5ff9..61dce2d47 100644
--- a/OpenCL/m19500_a3-pure.cl
+++ b/OpenCL/m19500_a3-pure.cl
@@ -60,7 +60,7 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
@@ -206,7 +206,7 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t))
                  | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   if (gid >= gid_max) return;
 
diff --git a/OpenCL/m19600-pure.cl b/OpenCL/m19600-pure.cl
index 265d7aee5..aa19771fe 100644
--- a/OpenCL/m19600-pure.cl
+++ b/OpenCL/m19600-pure.cl
@@ -323,7 +323,7 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t
     s_td4[i] = td4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m19700-pure.cl b/OpenCL/m19700-pure.cl
index df9b79e6b..efa5493ec 100644
--- a/OpenCL/m19700-pure.cl
+++ b/OpenCL/m19700-pure.cl
@@ -323,7 +323,7 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m19800-pure.cl b/OpenCL/m19800-pure.cl
index a25f22da8..5c46c3edf 100644
--- a/OpenCL/m19800-pure.cl
+++ b/OpenCL/m19800-pure.cl
@@ -323,7 +323,7 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m19900-pure.cl b/OpenCL/m19900-pure.cl
index 0d8cccc01..b5cfa8bfd 100644
--- a/OpenCL/m19900-pure.cl
+++ b/OpenCL/m19900-pure.cl
@@ -324,7 +324,7 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t))
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m20011-pure.cl b/OpenCL/m20011-pure.cl
index 3fed10939..4733e9f91 100644
--- a/OpenCL/m20011-pure.cl
+++ b/OpenCL/m20011-pure.cl
@@ -360,7 +360,7 @@ KERNEL_FQ void m20011_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m20012-pure.cl b/OpenCL/m20012-pure.cl
index 2a75acd23..dae331981 100644
--- a/OpenCL/m20012-pure.cl
+++ b/OpenCL/m20012-pure.cl
@@ -360,7 +360,7 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 
diff --git a/OpenCL/m20013-pure.cl b/OpenCL/m20013-pure.cl
index f7f1239a9..fcd2b815a 100644
--- a/OpenCL/m20013-pure.cl
+++ b/OpenCL/m20013-pure.cl
@@ -360,7 +360,7 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
     s_te4[i] = te4[i];
   }
 
-  barrier (CLK_LOCAL_MEM_FENCE);
+  SYNC_THREADS ();
 
   #else
 

From 89119bf24ad731c1a12a10c31ac19104b9b7531e Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 13:59:43 +0200
Subject: [PATCH 05/73] Add missing inc_platform.h include

---
 OpenCL/amp_a0.cl                | 1 +
 OpenCL/amp_a1.cl                | 1 +
 OpenCL/amp_a3.cl                | 1 +
 OpenCL/inc_cipher_aes.cl        | 1 +
 OpenCL/inc_cipher_camellia.cl   | 1 +
 OpenCL/inc_cipher_des.cl        | 1 +
 OpenCL/inc_cipher_kuznyechik.cl | 1 +
 OpenCL/inc_cipher_serpent.cl    | 1 +
 OpenCL/inc_cipher_twofish.cl    | 1 +
 OpenCL/inc_common.cl            | 1 +
 OpenCL/inc_hash_md4.cl          | 1 +
 OpenCL/inc_hash_md5.cl          | 1 +
 OpenCL/inc_hash_ripemd160.cl    | 1 +
 OpenCL/inc_hash_sha1.cl         | 1 +
 OpenCL/inc_hash_sha224.cl       | 1 +
 OpenCL/inc_hash_sha256.cl       | 1 +
 OpenCL/inc_hash_sha384.cl       | 1 +
 OpenCL/inc_hash_sha512.cl       | 1 +
 OpenCL/inc_hash_streebog256.cl  | 1 +
 OpenCL/inc_hash_streebog512.cl  | 1 +
 OpenCL/inc_hash_whirlpool.cl    | 1 +
 OpenCL/inc_luks_aes.cl          | 1 +
 OpenCL/inc_luks_af.cl           | 1 +
 OpenCL/inc_luks_essiv.cl        | 1 +
 OpenCL/inc_luks_serpent.cl      | 1 +
 OpenCL/inc_luks_twofish.cl      | 1 +
 OpenCL/inc_luks_xts.cl          | 1 +
 OpenCL/inc_platform.cl          | 1 +
 OpenCL/inc_rp.cl                | 1 +
 OpenCL/inc_rp_optimized.cl      | 1 +
 OpenCL/inc_simd.cl              | 1 +
 OpenCL/inc_truecrypt_crc32.cl   | 1 +
 OpenCL/inc_truecrypt_keyfile.cl | 1 +
 OpenCL/inc_truecrypt_xts.cl     | 1 +
 OpenCL/inc_veracrypt_xts.cl     | 1 +
 OpenCL/m00000_a0-optimized.cl   | 1 +
 OpenCL/m00000_a0-pure.cl        | 1 +
 OpenCL/m00000_a1-optimized.cl   | 1 +
 OpenCL/m00000_a1-pure.cl        | 1 +
 OpenCL/m00000_a3-optimized.cl   | 4 ++--
 OpenCL/m00000_a3-pure.cl        | 1 +
 OpenCL/m00010_a0-optimized.cl   | 1 +
 OpenCL/m00010_a0-pure.cl        | 1 +
 OpenCL/m00010_a1-optimized.cl   | 1 +
 OpenCL/m00010_a1-pure.cl        | 1 +
 OpenCL/m00010_a3-optimized.cl   | 1 +
 OpenCL/m00010_a3-pure.cl        | 1 +
 OpenCL/m00020_a0-optimized.cl   | 1 +
 OpenCL/m00020_a0-pure.cl        | 1 +
 OpenCL/m00020_a1-optimized.cl   | 1 +
 OpenCL/m00020_a1-pure.cl        | 1 +
 OpenCL/m00020_a3-optimized.cl   | 1 +
 OpenCL/m00020_a3-pure.cl        | 1 +
 OpenCL/m00030_a0-optimized.cl   | 1 +
 OpenCL/m00030_a0-pure.cl        | 1 +
 OpenCL/m00030_a1-optimized.cl   | 1 +
 OpenCL/m00030_a1-pure.cl        | 1 +
 OpenCL/m00030_a3-optimized.cl   | 1 +
 OpenCL/m00030_a3-pure.cl        | 1 +
 OpenCL/m00040_a0-optimized.cl   | 1 +
 OpenCL/m00040_a0-pure.cl        | 1 +
 OpenCL/m00040_a1-optimized.cl   | 1 +
 OpenCL/m00040_a1-pure.cl        | 1 +
 OpenCL/m00040_a3-optimized.cl   | 1 +
 OpenCL/m00040_a3-pure.cl        | 1 +
 OpenCL/m00050_a0-optimized.cl   | 1 +
 OpenCL/m00050_a0-pure.cl        | 1 +
 OpenCL/m00050_a1-optimized.cl   | 1 +
 OpenCL/m00050_a1-pure.cl        | 1 +
 OpenCL/m00050_a3-optimized.cl   | 1 +
 OpenCL/m00050_a3-pure.cl        | 1 +
 OpenCL/m00060_a0-optimized.cl   | 1 +
 OpenCL/m00060_a0-pure.cl        | 1 +
 OpenCL/m00060_a1-optimized.cl   | 1 +
 OpenCL/m00060_a1-pure.cl        | 1 +
 OpenCL/m00060_a3-optimized.cl   | 1 +
 OpenCL/m00060_a3-pure.cl        | 1 +
 OpenCL/m00100_a0-optimized.cl   | 1 +
 OpenCL/m00100_a0-pure.cl        | 1 +
 OpenCL/m00100_a1-optimized.cl   | 1 +
 OpenCL/m00100_a1-pure.cl        | 1 +
 OpenCL/m00100_a3-optimized.cl   | 1 +
 OpenCL/m00100_a3-pure.cl        | 1 +
 OpenCL/m00110_a0-optimized.cl   | 1 +
 OpenCL/m00110_a0-pure.cl        | 1 +
 OpenCL/m00110_a1-optimized.cl   | 1 +
 OpenCL/m00110_a1-pure.cl        | 1 +
 OpenCL/m00110_a3-optimized.cl   | 1 +
 OpenCL/m00110_a3-pure.cl        | 1 +
 OpenCL/m00120_a0-optimized.cl   | 1 +
 OpenCL/m00120_a0-pure.cl        | 1 +
 OpenCL/m00120_a1-optimized.cl   | 1 +
 OpenCL/m00120_a1-pure.cl        | 1 +
 OpenCL/m00120_a3-optimized.cl   | 1 +
 OpenCL/m00120_a3-pure.cl        | 1 +
 OpenCL/m00130_a0-optimized.cl   | 1 +
 OpenCL/m00130_a0-pure.cl        | 1 +
 OpenCL/m00130_a1-optimized.cl   | 1 +
 OpenCL/m00130_a1-pure.cl        | 1 +
 OpenCL/m00130_a3-optimized.cl   | 1 +
 OpenCL/m00130_a3-pure.cl        | 1 +
 OpenCL/m00140_a0-optimized.cl   | 1 +
 OpenCL/m00140_a0-pure.cl        | 1 +
 OpenCL/m00140_a1-optimized.cl   | 1 +
 OpenCL/m00140_a1-pure.cl        | 1 +
 OpenCL/m00140_a3-optimized.cl   | 1 +
 OpenCL/m00140_a3-pure.cl        | 1 +
 OpenCL/m00150_a0-optimized.cl   | 1 +
 OpenCL/m00150_a0-pure.cl        | 1 +
 OpenCL/m00150_a1-optimized.cl   | 1 +
 OpenCL/m00150_a1-pure.cl        | 1 +
 OpenCL/m00150_a3-optimized.cl   | 1 +
 OpenCL/m00150_a3-pure.cl        | 1 +
 OpenCL/m00160_a0-optimized.cl   | 1 +
 OpenCL/m00160_a0-pure.cl        | 1 +
 OpenCL/m00160_a1-optimized.cl   | 1 +
 OpenCL/m00160_a1-pure.cl        | 1 +
 OpenCL/m00160_a3-optimized.cl   | 1 +
 OpenCL/m00160_a3-pure.cl        | 1 +
 OpenCL/m00200_a0-optimized.cl   | 1 +
 OpenCL/m00200_a1-optimized.cl   | 1 +
 OpenCL/m00200_a3-optimized.cl   | 1 +
 OpenCL/m00300_a0-optimized.cl   | 1 +
 OpenCL/m00300_a0-pure.cl        | 1 +
 OpenCL/m00300_a1-optimized.cl   | 1 +
 OpenCL/m00300_a1-pure.cl        | 1 +
 OpenCL/m00300_a3-optimized.cl   | 1 +
 OpenCL/m00300_a3-pure.cl        | 1 +
 OpenCL/m00400-optimized.cl      | 1 +
 OpenCL/m00400-pure.cl           | 1 +
 OpenCL/m00500-optimized.cl      | 1 +
 OpenCL/m00500-pure.cl           | 1 +
 OpenCL/m00600_a0-optimized.cl   | 1 +
 OpenCL/m00600_a1-optimized.cl   | 1 +
 OpenCL/m00600_a3-optimized.cl   | 1 +
 OpenCL/m00900_a0-optimized.cl   | 1 +
 OpenCL/m00900_a0-pure.cl        | 1 +
 OpenCL/m00900_a1-optimized.cl   | 1 +
 OpenCL/m00900_a1-pure.cl        | 1 +
 OpenCL/m00900_a3-optimized.cl   | 1 +
 OpenCL/m00900_a3-pure.cl        | 1 +
 OpenCL/m01000_a0-optimized.cl   | 1 +
 OpenCL/m01000_a0-pure.cl        | 1 +
 OpenCL/m01000_a1-optimized.cl   | 1 +
 OpenCL/m01000_a1-pure.cl        | 1 +
 OpenCL/m01000_a3-optimized.cl   | 1 +
 OpenCL/m01000_a3-pure.cl        | 1 +
 OpenCL/m01100_a0-optimized.cl   | 1 +
 OpenCL/m01100_a0-pure.cl        | 1 +
 OpenCL/m01100_a1-optimized.cl   | 1 +
 OpenCL/m01100_a1-pure.cl        | 1 +
 OpenCL/m01100_a3-optimized.cl   | 1 +
 OpenCL/m01100_a3-pure.cl        | 1 +
 OpenCL/m01300_a0-optimized.cl   | 1 +
 OpenCL/m01300_a0-pure.cl        | 1 +
 OpenCL/m01300_a1-optimized.cl   | 1 +
 OpenCL/m01300_a1-pure.cl        | 1 +
 OpenCL/m01300_a3-optimized.cl   | 1 +
 OpenCL/m01300_a3-pure.cl        | 1 +
 OpenCL/m01400_a0-optimized.cl   | 1 +
 OpenCL/m01400_a0-pure.cl        | 1 +
 OpenCL/m01400_a1-optimized.cl   | 1 +
 OpenCL/m01400_a1-pure.cl        | 1 +
 OpenCL/m01400_a3-optimized.cl   | 1 +
 OpenCL/m01400_a3-pure.cl        | 1 +
 OpenCL/m01410_a0-optimized.cl   | 1 +
 OpenCL/m01410_a0-pure.cl        | 1 +
 OpenCL/m01410_a1-optimized.cl   | 1 +
 OpenCL/m01410_a1-pure.cl        | 1 +
 OpenCL/m01410_a3-optimized.cl   | 1 +
 OpenCL/m01410_a3-pure.cl        | 1 +
 OpenCL/m01420_a0-optimized.cl   | 1 +
 OpenCL/m01420_a0-pure.cl        | 1 +
 OpenCL/m01420_a1-optimized.cl   | 1 +
 OpenCL/m01420_a1-pure.cl        | 1 +
 OpenCL/m01420_a3-optimized.cl   | 1 +
 OpenCL/m01420_a3-pure.cl        | 1 +
 OpenCL/m01430_a0-optimized.cl   | 1 +
 OpenCL/m01430_a0-pure.cl        | 1 +
 OpenCL/m01430_a1-optimized.cl   | 1 +
 OpenCL/m01430_a1-pure.cl        | 1 +
 OpenCL/m01430_a3-optimized.cl   | 1 +
 OpenCL/m01430_a3-pure.cl        | 1 +
 OpenCL/m01440_a0-optimized.cl   | 1 +
 OpenCL/m01440_a0-pure.cl        | 1 +
 OpenCL/m01440_a1-optimized.cl   | 1 +
 OpenCL/m01440_a1-pure.cl        | 1 +
 OpenCL/m01440_a3-optimized.cl   | 1 +
 OpenCL/m01440_a3-pure.cl        | 1 +
 OpenCL/m01450_a0-optimized.cl   | 1 +
 OpenCL/m01450_a0-pure.cl        | 1 +
 OpenCL/m01450_a1-optimized.cl   | 1 +
 OpenCL/m01450_a1-pure.cl        | 1 +
 OpenCL/m01450_a3-optimized.cl   | 1 +
 OpenCL/m01450_a3-pure.cl        | 1 +
 OpenCL/m01460_a0-optimized.cl   | 1 +
 OpenCL/m01460_a0-pure.cl        | 1 +
 OpenCL/m01460_a1-optimized.cl   | 1 +
 OpenCL/m01460_a1-pure.cl        | 1 +
 OpenCL/m01460_a3-optimized.cl   | 1 +
 OpenCL/m01460_a3-pure.cl        | 1 +
 OpenCL/m01500_a0-pure.cl        | 1 +
 OpenCL/m01500_a1-pure.cl        | 1 +
 OpenCL/m01500_a3-pure.cl        | 1 +
 OpenCL/m01600-optimized.cl      | 1 +
 OpenCL/m01600-pure.cl           | 1 +
 OpenCL/m01700_a0-optimized.cl   | 1 +
 OpenCL/m01700_a0-pure.cl        | 1 +
 OpenCL/m01700_a1-optimized.cl   | 1 +
 OpenCL/m01700_a1-pure.cl        | 1 +
 OpenCL/m01700_a3-optimized.cl   | 1 +
 OpenCL/m01700_a3-pure.cl        | 1 +
 OpenCL/m01710_a0-optimized.cl   | 1 +
 OpenCL/m01710_a0-pure.cl        | 1 +
 OpenCL/m01710_a1-optimized.cl   | 1 +
 OpenCL/m01710_a1-pure.cl        | 1 +
 OpenCL/m01710_a3-optimized.cl   | 1 +
 OpenCL/m01710_a3-pure.cl        | 1 +
 OpenCL/m01720_a0-optimized.cl   | 1 +
 OpenCL/m01720_a0-pure.cl        | 1 +
 OpenCL/m01720_a1-optimized.cl   | 1 +
 OpenCL/m01720_a1-pure.cl        | 1 +
 OpenCL/m01720_a3-optimized.cl   | 1 +
 OpenCL/m01720_a3-pure.cl        | 1 +
 OpenCL/m01730_a0-optimized.cl   | 1 +
 OpenCL/m01730_a0-pure.cl        | 1 +
 OpenCL/m01730_a1-optimized.cl   | 1 +
 OpenCL/m01730_a1-pure.cl        | 1 +
 OpenCL/m01730_a3-optimized.cl   | 1 +
 OpenCL/m01730_a3-pure.cl        | 1 +
 OpenCL/m01740_a0-optimized.cl   | 1 +
 OpenCL/m01740_a0-pure.cl        | 1 +
 OpenCL/m01740_a1-optimized.cl   | 1 +
 OpenCL/m01740_a1-pure.cl        | 1 +
 OpenCL/m01740_a3-optimized.cl   | 1 +
 OpenCL/m01740_a3-pure.cl        | 1 +
 OpenCL/m01750_a0-optimized.cl   | 1 +
 OpenCL/m01750_a0-pure.cl        | 1 +
 OpenCL/m01750_a1-optimized.cl   | 1 +
 OpenCL/m01750_a1-pure.cl        | 1 +
 OpenCL/m01750_a3-optimized.cl   | 1 +
 OpenCL/m01750_a3-pure.cl        | 1 +
 OpenCL/m01760_a0-optimized.cl   | 1 +
 OpenCL/m01760_a0-pure.cl        | 1 +
 OpenCL/m01760_a1-optimized.cl   | 1 +
 OpenCL/m01760_a1-pure.cl        | 1 +
 OpenCL/m01760_a3-optimized.cl   | 1 +
 OpenCL/m01760_a3-pure.cl        | 1 +
 OpenCL/m01800-optimized.cl      | 1 +
 OpenCL/m01800-pure.cl           | 1 +
 OpenCL/m02000_a0-pure.cl        | 1 +
 OpenCL/m02000_a1-pure.cl        | 1 +
 OpenCL/m02000_a3-pure.cl        | 1 +
 OpenCL/m02100-pure.cl           | 1 +
 OpenCL/m02400_a0-optimized.cl   | 1 +
 OpenCL/m02400_a1-optimized.cl   | 1 +
 OpenCL/m02400_a3-optimized.cl   | 1 +
 OpenCL/m02410_a0-optimized.cl   | 1 +
 OpenCL/m02410_a1-optimized.cl   | 1 +
 OpenCL/m02410_a3-optimized.cl   | 1 +
 OpenCL/m02500-pure.cl           | 2 ++
 OpenCL/m02501-pure.cl           | 2 ++
 OpenCL/m02610_a0-optimized.cl   | 1 +
 OpenCL/m02610_a0-pure.cl        | 1 +
 OpenCL/m02610_a1-optimized.cl   | 1 +
 OpenCL/m02610_a1-pure.cl        | 1 +
 OpenCL/m02610_a3-optimized.cl   | 1 +
 OpenCL/m02610_a3-pure.cl        | 1 +
 OpenCL/m02710_a0-optimized.cl   | 1 +
 OpenCL/m02710_a1-optimized.cl   | 1 +
 OpenCL/m02710_a3-optimized.cl   | 1 +
 OpenCL/m02810_a0-optimized.cl   | 1 +
 OpenCL/m02810_a0-pure.cl        | 1 +
 OpenCL/m02810_a1-optimized.cl   | 1 +
 OpenCL/m02810_a1-pure.cl        | 1 +
 OpenCL/m02810_a3-optimized.cl   | 1 +
 OpenCL/m02810_a3-pure.cl        | 1 +
 OpenCL/m03000_a0-pure.cl        | 1 +
 OpenCL/m03000_a1-pure.cl        | 1 +
 OpenCL/m03000_a3-pure.cl        | 1 +
 OpenCL/m03100_a0-optimized.cl   | 1 +
 OpenCL/m03100_a1-optimized.cl   | 1 +
 OpenCL/m03100_a3-optimized.cl   | 1 +
 OpenCL/m03200-pure.cl           | 1 +
 OpenCL/m03710_a0-optimized.cl   | 1 +
 OpenCL/m03710_a0-pure.cl        | 1 +
 OpenCL/m03710_a1-optimized.cl   | 1 +
 OpenCL/m03710_a1-pure.cl        | 1 +
 OpenCL/m03710_a3-optimized.cl   | 1 +
 OpenCL/m03710_a3-pure.cl        | 1 +
 OpenCL/m03800_a0-optimized.cl   | 1 +
 OpenCL/m03800_a0-pure.cl        | 1 +
 OpenCL/m03800_a1-optimized.cl   | 1 +
 OpenCL/m03800_a1-pure.cl        | 1 +
 OpenCL/m03800_a3-optimized.cl   | 1 +
 OpenCL/m03800_a3-pure.cl        | 1 +
 OpenCL/m03910_a0-optimized.cl   | 1 +
 OpenCL/m03910_a0-pure.cl        | 1 +
 OpenCL/m03910_a1-optimized.cl   | 1 +
 OpenCL/m03910_a1-pure.cl        | 1 +
 OpenCL/m03910_a3-optimized.cl   | 1 +
 OpenCL/m03910_a3-pure.cl        | 1 +
 OpenCL/m04010_a0-optimized.cl   | 1 +
 OpenCL/m04010_a0-pure.cl        | 1 +
 OpenCL/m04010_a1-optimized.cl   | 1 +
 OpenCL/m04010_a1-pure.cl        | 1 +
 OpenCL/m04010_a3-optimized.cl   | 1 +
 OpenCL/m04010_a3-pure.cl        | 1 +
 OpenCL/m04110_a0-optimized.cl   | 1 +
 OpenCL/m04110_a0-pure.cl        | 1 +
 OpenCL/m04110_a1-optimized.cl   | 1 +
 OpenCL/m04110_a1-pure.cl        | 1 +
 OpenCL/m04110_a3-optimized.cl   | 1 +
 OpenCL/m04110_a3-pure.cl        | 1 +
 OpenCL/m04310_a0-optimized.cl   | 1 +
 OpenCL/m04310_a0-pure.cl        | 1 +
 OpenCL/m04310_a1-optimized.cl   | 1 +
 OpenCL/m04310_a1-pure.cl        | 1 +
 OpenCL/m04310_a3-optimized.cl   | 1 +
 OpenCL/m04310_a3-pure.cl        | 1 +
 OpenCL/m04400_a0-optimized.cl   | 1 +
 OpenCL/m04400_a0-pure.cl        | 1 +
 OpenCL/m04400_a1-optimized.cl   | 1 +
 OpenCL/m04400_a1-pure.cl        | 1 +
 OpenCL/m04400_a3-optimized.cl   | 1 +
 OpenCL/m04400_a3-pure.cl        | 1 +
 OpenCL/m04500_a0-optimized.cl   | 1 +
 OpenCL/m04500_a0-pure.cl        | 1 +
 OpenCL/m04500_a1-optimized.cl   | 1 +
 OpenCL/m04500_a1-pure.cl        | 1 +
 OpenCL/m04500_a3-optimized.cl   | 1 +
 OpenCL/m04500_a3-pure.cl        | 1 +
 OpenCL/m04520_a0-optimized.cl   | 1 +
 OpenCL/m04520_a0-pure.cl        | 1 +
 OpenCL/m04520_a1-optimized.cl   | 1 +
 OpenCL/m04520_a1-pure.cl        | 1 +
 OpenCL/m04520_a3-optimized.cl   | 1 +
 OpenCL/m04520_a3-pure.cl        | 1 +
 OpenCL/m04700_a0-optimized.cl   | 1 +
 OpenCL/m04700_a0-pure.cl        | 1 +
 OpenCL/m04700_a1-optimized.cl   | 1 +
 OpenCL/m04700_a1-pure.cl        | 1 +
 OpenCL/m04700_a3-optimized.cl   | 1 +
 OpenCL/m04700_a3-pure.cl        | 1 +
 OpenCL/m04800_a0-optimized.cl   | 1 +
 OpenCL/m04800_a0-pure.cl        | 1 +
 OpenCL/m04800_a1-optimized.cl   | 1 +
 OpenCL/m04800_a1-pure.cl        | 1 +
 OpenCL/m04800_a3-optimized.cl   | 1 +
 OpenCL/m04800_a3-pure.cl        | 1 +
 OpenCL/m04900_a0-optimized.cl   | 1 +
 OpenCL/m04900_a0-pure.cl        | 1 +
 OpenCL/m04900_a1-optimized.cl   | 1 +
 OpenCL/m04900_a1-pure.cl        | 1 +
 OpenCL/m04900_a3-optimized.cl   | 1 +
 OpenCL/m04900_a3-pure.cl        | 1 +
 OpenCL/m05100_a0-optimized.cl   | 1 +
 OpenCL/m05100_a0-pure.cl        | 1 +
 OpenCL/m05100_a1-optimized.cl   | 1 +
 OpenCL/m05100_a1-pure.cl        | 1 +
 OpenCL/m05100_a3-optimized.cl   | 1 +
 OpenCL/m05100_a3-pure.cl        | 1 +
 OpenCL/m05200-pure.cl           | 1 +
 OpenCL/m05300_a0-optimized.cl   | 1 +
 OpenCL/m05300_a0-pure.cl        | 1 +
 OpenCL/m05300_a1-optimized.cl   | 1 +
 OpenCL/m05300_a1-pure.cl        | 1 +
 OpenCL/m05300_a3-optimized.cl   | 1 +
 OpenCL/m05300_a3-pure.cl        | 1 +
 OpenCL/m05400_a0-optimized.cl   | 1 +
 OpenCL/m05400_a0-pure.cl        | 1 +
 OpenCL/m05400_a1-optimized.cl   | 1 +
 OpenCL/m05400_a1-pure.cl        | 1 +
 OpenCL/m05400_a3-optimized.cl   | 1 +
 OpenCL/m05400_a3-pure.cl        | 1 +
 OpenCL/m05500_a0-optimized.cl   | 1 +
 OpenCL/m05500_a0-pure.cl        | 1 +
 OpenCL/m05500_a1-optimized.cl   | 1 +
 OpenCL/m05500_a1-pure.cl        | 1 +
 OpenCL/m05500_a3-optimized.cl   | 1 +
 OpenCL/m05500_a3-pure.cl        | 1 +
 OpenCL/m05600_a0-optimized.cl   | 1 +
 OpenCL/m05600_a0-pure.cl        | 1 +
 OpenCL/m05600_a1-optimized.cl   | 1 +
 OpenCL/m05600_a1-pure.cl        | 1 +
 OpenCL/m05600_a3-optimized.cl   | 1 +
 OpenCL/m05600_a3-pure.cl        | 1 +
 OpenCL/m05800-optimized.cl      | 1 +
 OpenCL/m05800-pure.cl           | 1 +
 OpenCL/m06000_a0-optimized.cl   | 1 +
 OpenCL/m06000_a0-pure.cl        | 1 +
 OpenCL/m06000_a1-optimized.cl   | 1 +
 OpenCL/m06000_a1-pure.cl        | 1 +
 OpenCL/m06000_a3-optimized.cl   | 1 +
 OpenCL/m06000_a3-pure.cl        | 1 +
 OpenCL/m06100_a0-optimized.cl   | 1 +
 OpenCL/m06100_a0-pure.cl        | 1 +
 OpenCL/m06100_a1-optimized.cl   | 1 +
 OpenCL/m06100_a1-pure.cl        | 1 +
 OpenCL/m06100_a3-optimized.cl   | 1 +
 OpenCL/m06100_a3-pure.cl        | 1 +
 OpenCL/m06211-pure.cl           | 1 +
 OpenCL/m06212-pure.cl           | 1 +
 OpenCL/m06213-pure.cl           | 1 +
 OpenCL/m06221-pure.cl           | 1 +
 OpenCL/m06222-pure.cl           | 1 +
 OpenCL/m06223-pure.cl           | 1 +
 OpenCL/m06231-pure.cl           | 1 +
 OpenCL/m06232-pure.cl           | 1 +
 OpenCL/m06233-pure.cl           | 1 +
 OpenCL/m06300-optimized.cl      | 1 +
 OpenCL/m06300-pure.cl           | 1 +
 OpenCL/m06400-pure.cl           | 1 +
 OpenCL/m06500-pure.cl           | 1 +
 OpenCL/m06600-pure.cl           | 1 +
 OpenCL/m06700-pure.cl           | 1 +
 OpenCL/m06800-pure.cl           | 1 +
 OpenCL/m06900_a0-optimized.cl   | 1 +
 OpenCL/m06900_a1-optimized.cl   | 1 +
 OpenCL/m06900_a3-optimized.cl   | 1 +
 OpenCL/m07000_a0-optimized.cl   | 1 +
 OpenCL/m07000_a0-pure.cl        | 1 +
 OpenCL/m07000_a1-optimized.cl   | 1 +
 OpenCL/m07000_a1-pure.cl        | 1 +
 OpenCL/m07000_a3-optimized.cl   | 1 +
 OpenCL/m07000_a3-pure.cl        | 1 +
 OpenCL/m07100-pure.cl           | 1 +
 OpenCL/m07300_a0-optimized.cl   | 1 +
 OpenCL/m07300_a0-pure.cl        | 1 +
 OpenCL/m07300_a1-optimized.cl   | 1 +
 OpenCL/m07300_a1-pure.cl        | 1 +
 OpenCL/m07300_a3-optimized.cl   | 1 +
 OpenCL/m07300_a3-pure.cl        | 1 +
 OpenCL/m07400-optimized.cl      | 1 +
 OpenCL/m07400-pure.cl           | 1 +
 OpenCL/m07500_a0-optimized.cl   | 1 +
 OpenCL/m07500_a0-pure.cl        | 1 +
 OpenCL/m07500_a1-optimized.cl   | 1 +
 OpenCL/m07500_a1-pure.cl        | 1 +
 OpenCL/m07500_a3-optimized.cl   | 1 +
 OpenCL/m07500_a3-pure.cl        | 1 +
 OpenCL/m07700_a0-optimized.cl   | 1 +
 OpenCL/m07700_a1-optimized.cl   | 1 +
 OpenCL/m07700_a3-optimized.cl   | 1 +
 OpenCL/m07701_a0-optimized.cl   | 1 +
 OpenCL/m07701_a1-optimized.cl   | 1 +
 OpenCL/m07701_a3-optimized.cl   | 1 +
 OpenCL/m07800_a0-optimized.cl   | 1 +
 OpenCL/m07800_a1-optimized.cl   | 1 +
 OpenCL/m07800_a3-optimized.cl   | 1 +
 OpenCL/m07801_a0-optimized.cl   | 1 +
 OpenCL/m07801_a1-optimized.cl   | 1 +
 OpenCL/m07801_a3-optimized.cl   | 1 +
 OpenCL/m07900-pure.cl           | 1 +
 OpenCL/m08000_a0-optimized.cl   | 1 +
 OpenCL/m08000_a1-optimized.cl   | 1 +
 OpenCL/m08000_a3-optimized.cl   | 1 +
 OpenCL/m08100_a0-optimized.cl   | 1 +
 OpenCL/m08100_a0-pure.cl        | 1 +
 OpenCL/m08100_a1-optimized.cl   | 1 +
 OpenCL/m08100_a1-pure.cl        | 1 +
 OpenCL/m08100_a3-optimized.cl   | 1 +
 OpenCL/m08100_a3-pure.cl        | 1 +
 OpenCL/m08200-pure.cl           | 1 +
 OpenCL/m08300_a0-optimized.cl   | 1 +
 OpenCL/m08300_a0-pure.cl        | 1 +
 OpenCL/m08300_a1-optimized.cl   | 1 +
 OpenCL/m08300_a1-pure.cl        | 1 +
 OpenCL/m08300_a3-optimized.cl   | 1 +
 OpenCL/m08300_a3-pure.cl        | 1 +
 OpenCL/m08400_a0-optimized.cl   | 1 +
 OpenCL/m08400_a0-pure.cl        | 1 +
 OpenCL/m08400_a1-optimized.cl   | 1 +
 OpenCL/m08400_a1-pure.cl        | 1 +
 OpenCL/m08400_a3-optimized.cl   | 1 +
 OpenCL/m08400_a3-pure.cl        | 1 +
 OpenCL/m08500_a0-pure.cl        | 1 +
 OpenCL/m08500_a1-pure.cl        | 1 +
 OpenCL/m08500_a3-pure.cl        | 1 +
 OpenCL/m08600_a0-pure.cl        | 1 +
 OpenCL/m08600_a1-pure.cl        | 1 +
 OpenCL/m08600_a3-pure.cl        | 1 +
 OpenCL/m08700_a0-optimized.cl   | 1 +
 OpenCL/m08700_a1-optimized.cl   | 1 +
 OpenCL/m08700_a3-optimized.cl   | 1 +
 OpenCL/m08800-pure.cl           | 1 +
 OpenCL/m08900-pure.cl           | 1 +
 OpenCL/m09000-pure.cl           | 1 +
 OpenCL/m09100-pure.cl           | 1 +
 OpenCL/m09400-pure.cl           | 1 +
 OpenCL/m09500-pure.cl           | 1 +
 OpenCL/m09600-pure.cl           | 1 +
 OpenCL/m09700_a0-optimized.cl   | 1 +
 OpenCL/m09700_a1-optimized.cl   | 1 +
 OpenCL/m09700_a3-optimized.cl   | 1 +
 OpenCL/m09710_a0-optimized.cl   | 1 +
 OpenCL/m09710_a1-optimized.cl   | 1 +
 OpenCL/m09710_a3-optimized.cl   | 1 +
 OpenCL/m09720_a0-optimized.cl   | 1 +
 OpenCL/m09720_a1-optimized.cl   | 1 +
 OpenCL/m09720_a3-optimized.cl   | 1 +
 OpenCL/m09800_a0-optimized.cl   | 1 +
 OpenCL/m09800_a1-optimized.cl   | 1 +
 OpenCL/m09800_a3-optimized.cl   | 1 +
 OpenCL/m09810_a0-optimized.cl   | 1 +
 OpenCL/m09810_a1-optimized.cl   | 1 +
 OpenCL/m09810_a3-optimized.cl   | 1 +
 OpenCL/m09820_a0-optimized.cl   | 1 +
 OpenCL/m09820_a1-optimized.cl   | 1 +
 OpenCL/m09820_a3-optimized.cl   | 1 +
 OpenCL/m09900_a0-optimized.cl   | 1 +
 OpenCL/m09900_a0-pure.cl        | 1 +
 OpenCL/m09900_a1-optimized.cl   | 1 +
 OpenCL/m09900_a1-pure.cl        | 1 +
 OpenCL/m09900_a3-optimized.cl   | 1 +
 OpenCL/m09900_a3-pure.cl        | 1 +
 OpenCL/m10100_a0-optimized.cl   | 1 +
 OpenCL/m10100_a1-optimized.cl   | 1 +
 OpenCL/m10100_a3-optimized.cl   | 1 +
 OpenCL/m10300-pure.cl           | 1 +
 OpenCL/m10400_a0-optimized.cl   | 1 +
 OpenCL/m10400_a1-optimized.cl   | 1 +
 OpenCL/m10400_a3-optimized.cl   | 1 +
 OpenCL/m10410_a0-optimized.cl   | 1 +
 OpenCL/m10410_a1-optimized.cl   | 1 +
 OpenCL/m10410_a3-optimized.cl   | 1 +
 OpenCL/m10420_a0-optimized.cl   | 1 +
 OpenCL/m10420_a1-optimized.cl   | 1 +
 OpenCL/m10420_a3-optimized.cl   | 1 +
 OpenCL/m10500-pure.cl           | 1 +
 OpenCL/m10700-optimized.cl      | 1 +
 OpenCL/m10700-pure.cl           | 1 +
 OpenCL/m10800_a0-optimized.cl   | 1 +
 OpenCL/m10800_a0-pure.cl        | 1 +
 OpenCL/m10800_a1-optimized.cl   | 1 +
 OpenCL/m10800_a1-pure.cl        | 1 +
 OpenCL/m10800_a3-optimized.cl   | 1 +
 OpenCL/m10800_a3-pure.cl        | 1 +
 OpenCL/m10900-pure.cl           | 1 +
 OpenCL/m11000_a0-optimized.cl   | 1 +
 OpenCL/m11000_a0-pure.cl        | 1 +
 OpenCL/m11000_a1-optimized.cl   | 1 +
 OpenCL/m11000_a1-pure.cl        | 1 +
 OpenCL/m11000_a3-optimized.cl   | 1 +
 OpenCL/m11000_a3-pure.cl        | 1 +
 OpenCL/m11100_a0-optimized.cl   | 1 +
 OpenCL/m11100_a0-pure.cl        | 1 +
 OpenCL/m11100_a1-optimized.cl   | 1 +
 OpenCL/m11100_a1-pure.cl        | 1 +
 OpenCL/m11100_a3-optimized.cl   | 1 +
 OpenCL/m11100_a3-pure.cl        | 1 +
 OpenCL/m11200_a0-optimized.cl   | 1 +
 OpenCL/m11200_a0-pure.cl        | 1 +
 OpenCL/m11200_a1-optimized.cl   | 1 +
 OpenCL/m11200_a1-pure.cl        | 1 +
 OpenCL/m11200_a3-optimized.cl   | 1 +
 OpenCL/m11200_a3-pure.cl        | 1 +
 OpenCL/m11300-pure.cl           | 1 +
 OpenCL/m11400_a0-pure.cl        | 1 +
 OpenCL/m11400_a1-pure.cl        | 1 +
 OpenCL/m11400_a3-pure.cl        | 1 +
 OpenCL/m11500_a0-optimized.cl   | 1 +
 OpenCL/m11500_a1-optimized.cl   | 1 +
 OpenCL/m11500_a3-optimized.cl   | 1 +
 OpenCL/m11600-pure.cl           | 1 +
 OpenCL/m11700_a0-optimized.cl   | 1 +
 OpenCL/m11700_a0-pure.cl        | 1 +
 OpenCL/m11700_a1-optimized.cl   | 1 +
 OpenCL/m11700_a1-pure.cl        | 1 +
 OpenCL/m11700_a3-optimized.cl   | 1 +
 OpenCL/m11700_a3-pure.cl        | 1 +
 OpenCL/m11750_a0-pure.cl        | 1 +
 OpenCL/m11750_a1-pure.cl        | 1 +
 OpenCL/m11750_a3-pure.cl        | 1 +
 OpenCL/m11760_a0-pure.cl        | 1 +
 OpenCL/m11760_a1-pure.cl        | 1 +
 OpenCL/m11760_a3-pure.cl        | 1 +
 OpenCL/m11800_a0-optimized.cl   | 1 +
 OpenCL/m11800_a0-pure.cl        | 1 +
 OpenCL/m11800_a1-optimized.cl   | 1 +
 OpenCL/m11800_a1-pure.cl        | 1 +
 OpenCL/m11800_a3-optimized.cl   | 1 +
 OpenCL/m11800_a3-pure.cl        | 1 +
 OpenCL/m11850_a0-pure.cl        | 1 +
 OpenCL/m11850_a1-pure.cl        | 1 +
 OpenCL/m11850_a3-pure.cl        | 1 +
 OpenCL/m11860_a0-pure.cl        | 1 +
 OpenCL/m11860_a1-pure.cl        | 1 +
 OpenCL/m11860_a3-pure.cl        | 1 +
 OpenCL/m11900-pure.cl           | 1 +
 OpenCL/m12000-pure.cl           | 1 +
 OpenCL/m12200-pure.cl           | 1 +
 OpenCL/m12300-pure.cl           | 1 +
 OpenCL/m12400-pure.cl           | 1 +
 OpenCL/m12500-pure.cl           | 1 +
 OpenCL/m12600_a0-optimized.cl   | 1 +
 OpenCL/m12600_a0-pure.cl        | 1 +
 OpenCL/m12600_a1-optimized.cl   | 1 +
 OpenCL/m12600_a1-pure.cl        | 1 +
 OpenCL/m12600_a3-optimized.cl   | 1 +
 OpenCL/m12600_a3-pure.cl        | 1 +
 OpenCL/m12700-pure.cl           | 1 +
 OpenCL/m12800-pure.cl           | 1 +
 OpenCL/m12900-pure.cl           | 1 +
 OpenCL/m13000-pure.cl           | 1 +
 OpenCL/m13100_a0-optimized.cl   | 1 +
 OpenCL/m13100_a0-pure.cl        | 1 +
 OpenCL/m13100_a1-optimized.cl   | 1 +
 OpenCL/m13100_a1-pure.cl        | 1 +
 OpenCL/m13100_a3-optimized.cl   | 1 +
 OpenCL/m13100_a3-pure.cl        | 1 +
 OpenCL/m13200-pure.cl           | 1 +
 OpenCL/m13300_a0-optimized.cl   | 1 +
 OpenCL/m13300_a0-pure.cl        | 1 +
 OpenCL/m13300_a1-optimized.cl   | 1 +
 OpenCL/m13300_a1-pure.cl        | 1 +
 OpenCL/m13300_a3-optimized.cl   | 1 +
 OpenCL/m13300_a3-pure.cl        | 1 +
 OpenCL/m13400-pure.cl           | 1 +
 OpenCL/m13500_a0-optimized.cl   | 1 +
 OpenCL/m13500_a0-pure.cl        | 1 +
 OpenCL/m13500_a1-optimized.cl   | 1 +
 OpenCL/m13500_a1-pure.cl        | 1 +
 OpenCL/m13500_a3-optimized.cl   | 1 +
 OpenCL/m13500_a3-pure.cl        | 1 +
 OpenCL/m13600-pure.cl           | 1 +
 OpenCL/m13711-pure.cl           | 1 +
 OpenCL/m13712-pure.cl           | 1 +
 OpenCL/m13713-pure.cl           | 1 +
 OpenCL/m13721-pure.cl           | 1 +
 OpenCL/m13722-pure.cl           | 1 +
 OpenCL/m13723-pure.cl           | 1 +
 OpenCL/m13731-pure.cl           | 1 +
 OpenCL/m13732-pure.cl           | 1 +
 OpenCL/m13733-pure.cl           | 1 +
 OpenCL/m13751-pure.cl           | 1 +
 OpenCL/m13752-pure.cl           | 1 +
 OpenCL/m13753-pure.cl           | 1 +
 OpenCL/m13771-pure.cl           | 1 +
 OpenCL/m13772-pure.cl           | 1 +
 OpenCL/m13773-pure.cl           | 1 +
 OpenCL/m13800_a0-optimized.cl   | 1 +
 OpenCL/m13800_a0-pure.cl        | 1 +
 OpenCL/m13800_a1-optimized.cl   | 1 +
 OpenCL/m13800_a1-pure.cl        | 1 +
 OpenCL/m13800_a3-optimized.cl   | 1 +
 OpenCL/m13800_a3-pure.cl        | 1 +
 OpenCL/m13900_a0-optimized.cl   | 1 +
 OpenCL/m13900_a0-pure.cl        | 1 +
 OpenCL/m13900_a1-optimized.cl   | 1 +
 OpenCL/m13900_a1-pure.cl        | 1 +
 OpenCL/m13900_a3-optimized.cl   | 1 +
 OpenCL/m13900_a3-pure.cl        | 1 +
 OpenCL/m14000_a0-pure.cl        | 1 +
 OpenCL/m14000_a1-pure.cl        | 1 +
 OpenCL/m14000_a3-pure.cl        | 1 +
 OpenCL/m14100_a0-pure.cl        | 1 +
 OpenCL/m14100_a1-pure.cl        | 1 +
 OpenCL/m14100_a3-pure.cl        | 1 +
 OpenCL/m14400_a0-optimized.cl   | 1 +
 OpenCL/m14400_a0-pure.cl        | 1 +
 OpenCL/m14400_a1-optimized.cl   | 1 +
 OpenCL/m14400_a1-pure.cl        | 1 +
 OpenCL/m14400_a3-optimized.cl   | 1 +
 OpenCL/m14400_a3-pure.cl        | 1 +
 OpenCL/m14611-pure.cl           | 1 +
 OpenCL/m14612-pure.cl           | 1 +
 OpenCL/m14613-pure.cl           | 1 +
 OpenCL/m14621-pure.cl           | 1 +
 OpenCL/m14622-pure.cl           | 1 +
 OpenCL/m14623-pure.cl           | 1 +
 OpenCL/m14631-pure.cl           | 1 +
 OpenCL/m14632-pure.cl           | 1 +
 OpenCL/m14633-pure.cl           | 1 +
 OpenCL/m14641-pure.cl           | 1 +
 OpenCL/m14642-pure.cl           | 1 +
 OpenCL/m14643-pure.cl           | 1 +
 OpenCL/m14700-pure.cl           | 1 +
 OpenCL/m14800-pure.cl           | 1 +
 OpenCL/m14900_a0-optimized.cl   | 1 +
 OpenCL/m14900_a1-optimized.cl   | 1 +
 OpenCL/m14900_a3-optimized.cl   | 1 +
 OpenCL/m15000_a0-optimized.cl   | 1 +
 OpenCL/m15000_a0-pure.cl        | 1 +
 OpenCL/m15000_a1-optimized.cl   | 1 +
 OpenCL/m15000_a1-pure.cl        | 1 +
 OpenCL/m15000_a3-optimized.cl   | 1 +
 OpenCL/m15000_a3-pure.cl        | 1 +
 OpenCL/m15100-pure.cl           | 1 +
 OpenCL/m15300-pure.cl           | 1 +
 OpenCL/m15400_a0-optimized.cl   | 1 +
 OpenCL/m15400_a1-optimized.cl   | 1 +
 OpenCL/m15400_a3-optimized.cl   | 1 +
 OpenCL/m15500_a0-optimized.cl   | 1 +
 OpenCL/m15500_a0-pure.cl        | 1 +
 OpenCL/m15500_a1-optimized.cl   | 1 +
 OpenCL/m15500_a1-pure.cl        | 1 +
 OpenCL/m15500_a3-optimized.cl   | 1 +
 OpenCL/m15500_a3-pure.cl        | 1 +
 OpenCL/m15600-pure.cl           | 1 +
 OpenCL/m15700-pure.cl           | 1 +
 OpenCL/m15900-pure.cl           | 1 +
 OpenCL/m16000_a0-pure.cl        | 1 +
 OpenCL/m16000_a1-pure.cl        | 1 +
 OpenCL/m16000_a3-pure.cl        | 1 +
 OpenCL/m16100_a0-optimized.cl   | 1 +
 OpenCL/m16100_a0-pure.cl        | 1 +
 OpenCL/m16100_a1-optimized.cl   | 1 +
 OpenCL/m16100_a1-pure.cl        | 1 +
 OpenCL/m16100_a3-optimized.cl   | 1 +
 OpenCL/m16100_a3-pure.cl        | 1 +
 OpenCL/m16200-pure.cl           | 1 +
 OpenCL/m16300-pure.cl           | 1 +
 OpenCL/m16400_a0-optimized.cl   | 1 +
 OpenCL/m16400_a0-pure.cl        | 1 +
 OpenCL/m16400_a1-optimized.cl   | 1 +
 OpenCL/m16400_a1-pure.cl        | 1 +
 OpenCL/m16400_a3-optimized.cl   | 1 +
 OpenCL/m16400_a3-pure.cl        | 1 +
 OpenCL/m16511_a0-pure.cl        | 1 +
 OpenCL/m16511_a1-pure.cl        | 1 +
 OpenCL/m16511_a3-pure.cl        | 1 +
 OpenCL/m16512_a0-pure.cl        | 1 +
 OpenCL/m16512_a1-pure.cl        | 1 +
 OpenCL/m16512_a3-pure.cl        | 1 +
 OpenCL/m16513_a0-pure.cl        | 1 +
 OpenCL/m16513_a1-pure.cl        | 1 +
 OpenCL/m16513_a3-pure.cl        | 1 +
 OpenCL/m16600_a0-optimized.cl   | 1 +
 OpenCL/m16600_a0-pure.cl        | 1 +
 OpenCL/m16600_a1-optimized.cl   | 1 +
 OpenCL/m16600_a1-pure.cl        | 1 +
 OpenCL/m16600_a3-optimized.cl   | 1 +
 OpenCL/m16600_a3-pure.cl        | 1 +
 OpenCL/m16800-pure.cl           | 2 ++
 OpenCL/m16801-pure.cl           | 2 ++
 OpenCL/m16900-pure.cl           | 1 +
 OpenCL/m17300_a0-optimized.cl   | 1 +
 OpenCL/m17300_a1-optimized.cl   | 1 +
 OpenCL/m17300_a3-optimized.cl   | 1 +
 OpenCL/m17400_a0-optimized.cl   | 1 +
 OpenCL/m17400_a1-optimized.cl   | 1 +
 OpenCL/m17400_a3-optimized.cl   | 1 +
 OpenCL/m17500_a0-optimized.cl   | 1 +
 OpenCL/m17500_a1-optimized.cl   | 1 +
 OpenCL/m17500_a3-optimized.cl   | 1 +
 OpenCL/m17600_a0-optimized.cl   | 1 +
 OpenCL/m17600_a1-optimized.cl   | 1 +
 OpenCL/m17600_a3-optimized.cl   | 1 +
 OpenCL/m17700_a0-optimized.cl   | 1 +
 OpenCL/m17700_a1-optimized.cl   | 1 +
 OpenCL/m17700_a3-optimized.cl   | 1 +
 OpenCL/m17800_a0-optimized.cl   | 1 +
 OpenCL/m17800_a1-optimized.cl   | 1 +
 OpenCL/m17800_a3-optimized.cl   | 1 +
 OpenCL/m17900_a0-optimized.cl   | 1 +
 OpenCL/m17900_a1-optimized.cl   | 1 +
 OpenCL/m17900_a3-optimized.cl   | 1 +
 OpenCL/m18000_a0-optimized.cl   | 1 +
 OpenCL/m18000_a1-optimized.cl   | 1 +
 OpenCL/m18000_a3-optimized.cl   | 1 +
 OpenCL/m18100_a0-pure.cl        | 1 +
 OpenCL/m18100_a1-pure.cl        | 1 +
 OpenCL/m18100_a3-pure.cl        | 1 +
 OpenCL/m18200_a0-optimized.cl   | 1 +
 OpenCL/m18200_a0-pure.cl        | 1 +
 OpenCL/m18200_a1-optimized.cl   | 1 +
 OpenCL/m18200_a1-pure.cl        | 1 +
 OpenCL/m18200_a3-optimized.cl   | 1 +
 OpenCL/m18200_a3-pure.cl        | 1 +
 OpenCL/m18300-pure.cl           | 1 +
 OpenCL/m18400-pure.cl           | 1 +
 OpenCL/m18500_a0-pure.cl        | 1 +
 OpenCL/m18500_a1-pure.cl        | 1 +
 OpenCL/m18500_a3-pure.cl        | 1 +
 OpenCL/m18600-pure.cl           | 1 +
 OpenCL/m18700_a0-optimized.cl   | 1 +
 OpenCL/m18700_a0-pure.cl        | 1 +
 OpenCL/m18700_a1-optimized.cl   | 1 +
 OpenCL/m18700_a1-pure.cl        | 1 +
 OpenCL/m18700_a3-optimized.cl   | 1 +
 OpenCL/m18700_a3-pure.cl        | 1 +
 OpenCL/m18800-pure.cl           | 1 +
 OpenCL/m18900-pure.cl           | 1 +
 OpenCL/m19000-pure.cl           | 1 +
 OpenCL/m19100-pure.cl           | 1 +
 OpenCL/m19200-pure.cl           | 1 +
 OpenCL/m19300_a0-pure.cl        | 1 +
 OpenCL/m19300_a1-pure.cl        | 1 +
 OpenCL/m19300_a3-pure.cl        | 1 +
 OpenCL/m19500_a0-pure.cl        | 1 +
 OpenCL/m19500_a1-pure.cl        | 1 +
 OpenCL/m19500_a3-pure.cl        | 1 +
 OpenCL/m19600-pure.cl           | 1 +
 OpenCL/m19700-pure.cl           | 1 +
 OpenCL/m19800-pure.cl           | 1 +
 OpenCL/m19900-pure.cl           | 1 +
 OpenCL/m20011-pure.cl           | 1 +
 OpenCL/m20012-pure.cl           | 1 +
 OpenCL/m20013-pure.cl           | 1 +
 OpenCL/markov_be.cl             | 1 +
 OpenCL/markov_le.cl             | 1 +
 802 files changed, 807 insertions(+), 2 deletions(-)

diff --git a/OpenCL/amp_a0.cl b/OpenCL/amp_a0.cl
index 2ae36b581..e09e3f6d8 100644
--- a/OpenCL/amp_a0.cl
+++ b/OpenCL/amp_a0.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/amp_a1.cl b/OpenCL/amp_a1.cl
index 5dda031bf..5ab241211 100644
--- a/OpenCL/amp_a1.cl
+++ b/OpenCL/amp_a1.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/amp_a3.cl b/OpenCL/amp_a3.cl
index d7ce0ea77..075e3c6f3 100644
--- a/OpenCL/amp_a3.cl
+++ b/OpenCL/amp_a3.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #endif
 
 KERNEL_FQ void amp (GLOBAL_AS pw_t *pws, GLOBAL_AS pw_t *pws_amp, GLOBAL_AS const kernel_rule_t *rules_buf, GLOBAL_AS const pw_t *combs_buf, CONSTANT_AS bf_t *bfs_buf, const u32 combs_mode, const u64 gid_max)
diff --git a/OpenCL/inc_cipher_aes.cl b/OpenCL/inc_cipher_aes.cl
index 4ca9f937e..8425414cd 100644
--- a/OpenCL/inc_cipher_aes.cl
+++ b/OpenCL/inc_cipher_aes.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_aes.h"
 
diff --git a/OpenCL/inc_cipher_camellia.cl b/OpenCL/inc_cipher_camellia.cl
index 7e08c163b..881e541bf 100644
--- a/OpenCL/inc_cipher_camellia.cl
+++ b/OpenCL/inc_cipher_camellia.cl
@@ -17,6 +17,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_camellia.h"
 
diff --git a/OpenCL/inc_cipher_des.cl b/OpenCL/inc_cipher_des.cl
index ec8a1e611..a90d5788b 100644
--- a/OpenCL/inc_cipher_des.cl
+++ b/OpenCL/inc_cipher_des.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_des.h"
 
diff --git a/OpenCL/inc_cipher_kuznyechik.cl b/OpenCL/inc_cipher_kuznyechik.cl
index 6147bf8d0..2c3792b96 100644
--- a/OpenCL/inc_cipher_kuznyechik.cl
+++ b/OpenCL/inc_cipher_kuznyechik.cl
@@ -14,6 +14,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_kuznyechik.h"
 
diff --git a/OpenCL/inc_cipher_serpent.cl b/OpenCL/inc_cipher_serpent.cl
index 5bdb3c3d4..b4e21e2e0 100644
--- a/OpenCL/inc_cipher_serpent.cl
+++ b/OpenCL/inc_cipher_serpent.cl
@@ -18,6 +18,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_serpent.h"
 
diff --git a/OpenCL/inc_cipher_twofish.cl b/OpenCL/inc_cipher_twofish.cl
index 2875f5fa9..90b6cf600 100644
--- a/OpenCL/inc_cipher_twofish.cl
+++ b/OpenCL/inc_cipher_twofish.cl
@@ -21,6 +21,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_twofish.h"
 
diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index 844e4ba12..a74fd2a2a 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_platform.h"
 #include "inc_common.h"
 
diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl
index 3b9113907..28720b25c 100644
--- a/OpenCL/inc_hash_md4.cl
+++ b/OpenCL/inc_hash_md4.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_md4.h"
 
diff --git a/OpenCL/inc_hash_md5.cl b/OpenCL/inc_hash_md5.cl
index 66c58d282..2fee96f96 100644
--- a/OpenCL/inc_hash_md5.cl
+++ b/OpenCL/inc_hash_md5.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_md5.h"
 
diff --git a/OpenCL/inc_hash_ripemd160.cl b/OpenCL/inc_hash_ripemd160.cl
index 73715e9ea..703f0dcc8 100644
--- a/OpenCL/inc_hash_ripemd160.cl
+++ b/OpenCL/inc_hash_ripemd160.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_ripemd160.h"
 
diff --git a/OpenCL/inc_hash_sha1.cl b/OpenCL/inc_hash_sha1.cl
index dcc6ceb04..6ec45c6ba 100644
--- a/OpenCL/inc_hash_sha1.cl
+++ b/OpenCL/inc_hash_sha1.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_sha1.h"
 
diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl
index ed4f81b0f..0758b1f54 100644
--- a/OpenCL/inc_hash_sha224.cl
+++ b/OpenCL/inc_hash_sha224.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_sha224.h"
 
diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl
index 464984b15..f52e551da 100644
--- a/OpenCL/inc_hash_sha256.cl
+++ b/OpenCL/inc_hash_sha256.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_sha256.h"
 
diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl
index 8817ab772..cdfa357ca 100644
--- a/OpenCL/inc_hash_sha384.cl
+++ b/OpenCL/inc_hash_sha384.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_sha384.h"
 
diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl
index 02e2a41df..9ea5463d8 100644
--- a/OpenCL/inc_hash_sha512.cl
+++ b/OpenCL/inc_hash_sha512.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_sha512.h"
 
diff --git a/OpenCL/inc_hash_streebog256.cl b/OpenCL/inc_hash_streebog256.cl
index e14ad4007..58900e7d7 100644
--- a/OpenCL/inc_hash_streebog256.cl
+++ b/OpenCL/inc_hash_streebog256.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_streebog256.h"
 
diff --git a/OpenCL/inc_hash_streebog512.cl b/OpenCL/inc_hash_streebog512.cl
index e32c4169d..7ad416ade 100644
--- a/OpenCL/inc_hash_streebog512.cl
+++ b/OpenCL/inc_hash_streebog512.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_streebog512.h"
 
diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl
index 0b61a7b9d..d4e1206c4 100644
--- a/OpenCL/inc_hash_whirlpool.cl
+++ b/OpenCL/inc_hash_whirlpool.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_whirlpool.h"
 
diff --git a/OpenCL/inc_luks_aes.cl b/OpenCL/inc_luks_aes.cl
index 6306edd94..8406ec1eb 100644
--- a/OpenCL/inc_luks_aes.cl
+++ b/OpenCL/inc_luks_aes.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_aes.h"
 #include "inc_luks_af.h"
diff --git a/OpenCL/inc_luks_af.cl b/OpenCL/inc_luks_af.cl
index 058143ca2..53a03e8cf 100644
--- a/OpenCL/inc_luks_af.cl
+++ b/OpenCL/inc_luks_af.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_ripemd160.h"
 #include "inc_hash_sha1.h"
diff --git a/OpenCL/inc_luks_essiv.cl b/OpenCL/inc_luks_essiv.cl
index 4465617ab..151284bd0 100644
--- a/OpenCL/inc_luks_essiv.cl
+++ b/OpenCL/inc_luks_essiv.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_hash_sha256.h"
 #include "inc_luks_essiv.h"
diff --git a/OpenCL/inc_luks_serpent.cl b/OpenCL/inc_luks_serpent.cl
index d4b1cf96c..1f75fc6a7 100644
--- a/OpenCL/inc_luks_serpent.cl
+++ b/OpenCL/inc_luks_serpent.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_luks_serpent.h"
 
diff --git a/OpenCL/inc_luks_twofish.cl b/OpenCL/inc_luks_twofish.cl
index 9c496f2cf..2d2d7d34c 100644
--- a/OpenCL/inc_luks_twofish.cl
+++ b/OpenCL/inc_luks_twofish.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_luks_twofish.h"
 
diff --git a/OpenCL/inc_luks_xts.cl b/OpenCL/inc_luks_xts.cl
index 5bd6e6410..a3b87abcc 100644
--- a/OpenCL/inc_luks_xts.cl
+++ b/OpenCL/inc_luks_xts.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_luks_xts.h"
 
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index ceb12a4f1..76ac96b6c 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_platform.h"
 
 #ifdef IS_NATIVE
diff --git a/OpenCL/inc_rp.cl b/OpenCL/inc_rp.cl
index ffa84f4ab..5c3905bdd 100644
--- a/OpenCL/inc_rp.cl
+++ b/OpenCL/inc_rp.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_rp.h"
 
diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl
index 53a5d4d38..8bfbe8b0a 100644
--- a/OpenCL/inc_rp_optimized.cl
+++ b/OpenCL/inc_rp_optimized.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_rp_optimized.h"
 
diff --git a/OpenCL/inc_simd.cl b/OpenCL/inc_simd.cl
index 26f413979..50bafeb68 100644
--- a/OpenCL/inc_simd.cl
+++ b/OpenCL/inc_simd.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_simd.h"
 
diff --git a/OpenCL/inc_truecrypt_crc32.cl b/OpenCL/inc_truecrypt_crc32.cl
index db0204b12..391ec91d0 100644
--- a/OpenCL/inc_truecrypt_crc32.cl
+++ b/OpenCL/inc_truecrypt_crc32.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_truecrypt_crc32.h"
 
diff --git a/OpenCL/inc_truecrypt_keyfile.cl b/OpenCL/inc_truecrypt_keyfile.cl
index faf287b5e..4822c811a 100644
--- a/OpenCL/inc_truecrypt_keyfile.cl
+++ b/OpenCL/inc_truecrypt_keyfile.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_truecrypt_keyfile.h"
 
diff --git a/OpenCL/inc_truecrypt_xts.cl b/OpenCL/inc_truecrypt_xts.cl
index 4e72d4e79..ceeff8842 100644
--- a/OpenCL/inc_truecrypt_xts.cl
+++ b/OpenCL/inc_truecrypt_xts.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_aes.h"
 #include "inc_cipher_serpent.h"
diff --git a/OpenCL/inc_veracrypt_xts.cl b/OpenCL/inc_veracrypt_xts.cl
index 8709db662..ba3b7eef5 100644
--- a/OpenCL/inc_veracrypt_xts.cl
+++ b/OpenCL/inc_veracrypt_xts.cl
@@ -5,6 +5,7 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_cipher_aes.h"
 #include "inc_cipher_serpent.h"
diff --git a/OpenCL/m00000_a0-optimized.cl b/OpenCL/m00000_a0-optimized.cl
index c7c4fec28..da224b637 100644
--- a/OpenCL/m00000_a0-optimized.cl
+++ b/OpenCL/m00000_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00000_a0-pure.cl b/OpenCL/m00000_a0-pure.cl
index 2ed90f5c1..ea2699153 100644
--- a/OpenCL/m00000_a0-pure.cl
+++ b/OpenCL/m00000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00000_a1-optimized.cl b/OpenCL/m00000_a1-optimized.cl
index 50fa4a568..4b49148a2 100644
--- a/OpenCL/m00000_a1-optimized.cl
+++ b/OpenCL/m00000_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_simd.cl"
diff --git a/OpenCL/m00000_a1-pure.cl b/OpenCL/m00000_a1-pure.cl
index eacb3910d..5bbb281c8 100644
--- a/OpenCL/m00000_a1-pure.cl
+++ b/OpenCL/m00000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00000_a3-optimized.cl b/OpenCL/m00000_a3-optimized.cl
index ef27d52f1..1a7fb4f9a 100644
--- a/OpenCL/m00000_a3-optimized.cl
+++ b/OpenCL/m00000_a3-optimized.cl
@@ -17,7 +17,7 @@
 #define MD5_STEP_REV(f,a,b,c,d,x,t,s)   \
 {                                       \
   a -= b;                               \
-  a  = hc_rotr32_S (a, s);                 \
+  a  = hc_rotr32_S (a, s);              \
   a -= f (b, c, d);                     \
   a -= x;                               \
   a -= t;                               \
@@ -26,7 +26,7 @@
 #define MD5_STEP_REV1(f,a,b,c,d,x,t,s)  \
 {                                       \
   a -= b;                               \
-  a  = hc_rotr32_S (a, s);                 \
+  a  = hc_rotr32_S (a, s);              \
   a -= x;                               \
   a -= t;                               \
 }
diff --git a/OpenCL/m00000_a3-pure.cl b/OpenCL/m00000_a3-pure.cl
index ee6072389..9ff74dcc3 100644
--- a/OpenCL/m00000_a3-pure.cl
+++ b/OpenCL/m00000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00010_a0-optimized.cl b/OpenCL/m00010_a0-optimized.cl
index 7859648fb..e735558df 100644
--- a/OpenCL/m00010_a0-optimized.cl
+++ b/OpenCL/m00010_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00010_a0-pure.cl b/OpenCL/m00010_a0-pure.cl
index 1b9765de2..047a01c9a 100644
--- a/OpenCL/m00010_a0-pure.cl
+++ b/OpenCL/m00010_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00010_a1-optimized.cl b/OpenCL/m00010_a1-optimized.cl
index 14e5ab986..ece0ac663 100644
--- a/OpenCL/m00010_a1-optimized.cl
+++ b/OpenCL/m00010_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00010_a1-pure.cl b/OpenCL/m00010_a1-pure.cl
index fbc397d7d..14f0c9271 100644
--- a/OpenCL/m00010_a1-pure.cl
+++ b/OpenCL/m00010_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00010_a3-optimized.cl b/OpenCL/m00010_a3-optimized.cl
index 0367d7cfe..a101428b1 100644
--- a/OpenCL/m00010_a3-optimized.cl
+++ b/OpenCL/m00010_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00010_a3-pure.cl b/OpenCL/m00010_a3-pure.cl
index 362207256..caa63b0b5 100644
--- a/OpenCL/m00010_a3-pure.cl
+++ b/OpenCL/m00010_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00020_a0-optimized.cl b/OpenCL/m00020_a0-optimized.cl
index 9e1445335..7becd0173 100644
--- a/OpenCL/m00020_a0-optimized.cl
+++ b/OpenCL/m00020_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00020_a0-pure.cl b/OpenCL/m00020_a0-pure.cl
index ee18c5f6b..43eb1158b 100644
--- a/OpenCL/m00020_a0-pure.cl
+++ b/OpenCL/m00020_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00020_a1-optimized.cl b/OpenCL/m00020_a1-optimized.cl
index dc37febfd..536b9ba16 100644
--- a/OpenCL/m00020_a1-optimized.cl
+++ b/OpenCL/m00020_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00020_a1-pure.cl b/OpenCL/m00020_a1-pure.cl
index 10b96807d..818d352aa 100644
--- a/OpenCL/m00020_a1-pure.cl
+++ b/OpenCL/m00020_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00020_a3-optimized.cl b/OpenCL/m00020_a3-optimized.cl
index 1dc9be105..939ce5710 100644
--- a/OpenCL/m00020_a3-optimized.cl
+++ b/OpenCL/m00020_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00020_a3-pure.cl b/OpenCL/m00020_a3-pure.cl
index d937f3f82..5cc9805f4 100644
--- a/OpenCL/m00020_a3-pure.cl
+++ b/OpenCL/m00020_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00030_a0-optimized.cl b/OpenCL/m00030_a0-optimized.cl
index 7b9cd9516..f6ba857a3 100644
--- a/OpenCL/m00030_a0-optimized.cl
+++ b/OpenCL/m00030_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00030_a0-pure.cl b/OpenCL/m00030_a0-pure.cl
index ac7ebbd6d..22f4ae6a7 100644
--- a/OpenCL/m00030_a0-pure.cl
+++ b/OpenCL/m00030_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00030_a1-optimized.cl b/OpenCL/m00030_a1-optimized.cl
index f67c54811..e40e653db 100644
--- a/OpenCL/m00030_a1-optimized.cl
+++ b/OpenCL/m00030_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00030_a1-pure.cl b/OpenCL/m00030_a1-pure.cl
index ac7582c66..fb974b0a2 100644
--- a/OpenCL/m00030_a1-pure.cl
+++ b/OpenCL/m00030_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00030_a3-optimized.cl b/OpenCL/m00030_a3-optimized.cl
index a1505af17..3c5a808f9 100644
--- a/OpenCL/m00030_a3-optimized.cl
+++ b/OpenCL/m00030_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00030_a3-pure.cl b/OpenCL/m00030_a3-pure.cl
index dedf84f53..053896651 100644
--- a/OpenCL/m00030_a3-pure.cl
+++ b/OpenCL/m00030_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00040_a0-optimized.cl b/OpenCL/m00040_a0-optimized.cl
index 87a03f7de..d04315627 100644
--- a/OpenCL/m00040_a0-optimized.cl
+++ b/OpenCL/m00040_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00040_a0-pure.cl b/OpenCL/m00040_a0-pure.cl
index d13e699e0..ef4f774f0 100644
--- a/OpenCL/m00040_a0-pure.cl
+++ b/OpenCL/m00040_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00040_a1-optimized.cl b/OpenCL/m00040_a1-optimized.cl
index 10031fee8..c5e5d3002 100644
--- a/OpenCL/m00040_a1-optimized.cl
+++ b/OpenCL/m00040_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00040_a1-pure.cl b/OpenCL/m00040_a1-pure.cl
index e32823a40..842ac0fd8 100644
--- a/OpenCL/m00040_a1-pure.cl
+++ b/OpenCL/m00040_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00040_a3-optimized.cl b/OpenCL/m00040_a3-optimized.cl
index bdfa5f16c..bb9bc38a6 100644
--- a/OpenCL/m00040_a3-optimized.cl
+++ b/OpenCL/m00040_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00040_a3-pure.cl b/OpenCL/m00040_a3-pure.cl
index 4da1b7438..ca7d7b843 100644
--- a/OpenCL/m00040_a3-pure.cl
+++ b/OpenCL/m00040_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00050_a0-optimized.cl b/OpenCL/m00050_a0-optimized.cl
index 944006caa..f94854ed8 100644
--- a/OpenCL/m00050_a0-optimized.cl
+++ b/OpenCL/m00050_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00050_a0-pure.cl b/OpenCL/m00050_a0-pure.cl
index 38fba8341..8ad06fa3b 100644
--- a/OpenCL/m00050_a0-pure.cl
+++ b/OpenCL/m00050_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00050_a1-optimized.cl b/OpenCL/m00050_a1-optimized.cl
index 6cebd87a4..650aec269 100644
--- a/OpenCL/m00050_a1-optimized.cl
+++ b/OpenCL/m00050_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00050_a1-pure.cl b/OpenCL/m00050_a1-pure.cl
index 927d48fcb..0cd0bd0f0 100644
--- a/OpenCL/m00050_a1-pure.cl
+++ b/OpenCL/m00050_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00050_a3-optimized.cl b/OpenCL/m00050_a3-optimized.cl
index a19259f1f..f3d733ceb 100644
--- a/OpenCL/m00050_a3-optimized.cl
+++ b/OpenCL/m00050_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00050_a3-pure.cl b/OpenCL/m00050_a3-pure.cl
index 3e0adaee3..693d3862f 100644
--- a/OpenCL/m00050_a3-pure.cl
+++ b/OpenCL/m00050_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00060_a0-optimized.cl b/OpenCL/m00060_a0-optimized.cl
index e9656ced9..7546e66d2 100644
--- a/OpenCL/m00060_a0-optimized.cl
+++ b/OpenCL/m00060_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00060_a0-pure.cl b/OpenCL/m00060_a0-pure.cl
index 9efc3ea1e..dfa8c6205 100644
--- a/OpenCL/m00060_a0-pure.cl
+++ b/OpenCL/m00060_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00060_a1-optimized.cl b/OpenCL/m00060_a1-optimized.cl
index 25159ee25..2bc59801c 100644
--- a/OpenCL/m00060_a1-optimized.cl
+++ b/OpenCL/m00060_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00060_a1-pure.cl b/OpenCL/m00060_a1-pure.cl
index fa247bfc8..db7376a83 100644
--- a/OpenCL/m00060_a1-pure.cl
+++ b/OpenCL/m00060_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00060_a3-optimized.cl b/OpenCL/m00060_a3-optimized.cl
index b46767385..1e051b965 100644
--- a/OpenCL/m00060_a3-optimized.cl
+++ b/OpenCL/m00060_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00060_a3-pure.cl b/OpenCL/m00060_a3-pure.cl
index c44f0ea10..73f2302a9 100644
--- a/OpenCL/m00060_a3-pure.cl
+++ b/OpenCL/m00060_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00100_a0-optimized.cl b/OpenCL/m00100_a0-optimized.cl
index 1f91fc49a..70363b391 100644
--- a/OpenCL/m00100_a0-optimized.cl
+++ b/OpenCL/m00100_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00100_a0-pure.cl b/OpenCL/m00100_a0-pure.cl
index 3b6068af9..82e4d7b62 100644
--- a/OpenCL/m00100_a0-pure.cl
+++ b/OpenCL/m00100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00100_a1-optimized.cl b/OpenCL/m00100_a1-optimized.cl
index 2dade3f9a..ab46a7c10 100644
--- a/OpenCL/m00100_a1-optimized.cl
+++ b/OpenCL/m00100_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00100_a1-pure.cl b/OpenCL/m00100_a1-pure.cl
index 2f0114d61..807ddc5ce 100644
--- a/OpenCL/m00100_a1-pure.cl
+++ b/OpenCL/m00100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00100_a3-optimized.cl b/OpenCL/m00100_a3-optimized.cl
index 7584036a7..6e01fc490 100644
--- a/OpenCL/m00100_a3-optimized.cl
+++ b/OpenCL/m00100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00100_a3-pure.cl b/OpenCL/m00100_a3-pure.cl
index 881f45ee4..32189c46f 100644
--- a/OpenCL/m00100_a3-pure.cl
+++ b/OpenCL/m00100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00110_a0-optimized.cl b/OpenCL/m00110_a0-optimized.cl
index 475409428..d210c5f11 100644
--- a/OpenCL/m00110_a0-optimized.cl
+++ b/OpenCL/m00110_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00110_a0-pure.cl b/OpenCL/m00110_a0-pure.cl
index 5103f8a56..1e3cc3c58 100644
--- a/OpenCL/m00110_a0-pure.cl
+++ b/OpenCL/m00110_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00110_a1-optimized.cl b/OpenCL/m00110_a1-optimized.cl
index ec9810df8..57a87affd 100644
--- a/OpenCL/m00110_a1-optimized.cl
+++ b/OpenCL/m00110_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00110_a1-pure.cl b/OpenCL/m00110_a1-pure.cl
index 294def602..ca1e2285d 100644
--- a/OpenCL/m00110_a1-pure.cl
+++ b/OpenCL/m00110_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00110_a3-optimized.cl b/OpenCL/m00110_a3-optimized.cl
index 9dcc6624c..187f5b658 100644
--- a/OpenCL/m00110_a3-optimized.cl
+++ b/OpenCL/m00110_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00110_a3-pure.cl b/OpenCL/m00110_a3-pure.cl
index c17b03694..8f3662640 100644
--- a/OpenCL/m00110_a3-pure.cl
+++ b/OpenCL/m00110_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00120_a0-optimized.cl b/OpenCL/m00120_a0-optimized.cl
index 75527d42f..18c94f7be 100644
--- a/OpenCL/m00120_a0-optimized.cl
+++ b/OpenCL/m00120_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00120_a0-pure.cl b/OpenCL/m00120_a0-pure.cl
index 653e2d0f8..3af320dfa 100644
--- a/OpenCL/m00120_a0-pure.cl
+++ b/OpenCL/m00120_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00120_a1-optimized.cl b/OpenCL/m00120_a1-optimized.cl
index 486297953..f5c28c971 100644
--- a/OpenCL/m00120_a1-optimized.cl
+++ b/OpenCL/m00120_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00120_a1-pure.cl b/OpenCL/m00120_a1-pure.cl
index c2c5d1f59..ef1256f90 100644
--- a/OpenCL/m00120_a1-pure.cl
+++ b/OpenCL/m00120_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00120_a3-optimized.cl b/OpenCL/m00120_a3-optimized.cl
index 2864927bd..4e57defe7 100644
--- a/OpenCL/m00120_a3-optimized.cl
+++ b/OpenCL/m00120_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00120_a3-pure.cl b/OpenCL/m00120_a3-pure.cl
index 6d5cf8c7c..d5bc699d3 100644
--- a/OpenCL/m00120_a3-pure.cl
+++ b/OpenCL/m00120_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00130_a0-optimized.cl b/OpenCL/m00130_a0-optimized.cl
index becbbbc52..2712d0b23 100644
--- a/OpenCL/m00130_a0-optimized.cl
+++ b/OpenCL/m00130_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00130_a0-pure.cl b/OpenCL/m00130_a0-pure.cl
index 9a8493ffa..b9214191f 100644
--- a/OpenCL/m00130_a0-pure.cl
+++ b/OpenCL/m00130_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00130_a1-optimized.cl b/OpenCL/m00130_a1-optimized.cl
index 64b2505f7..24d30df82 100644
--- a/OpenCL/m00130_a1-optimized.cl
+++ b/OpenCL/m00130_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00130_a1-pure.cl b/OpenCL/m00130_a1-pure.cl
index 69f9b7de2..a042b6a68 100644
--- a/OpenCL/m00130_a1-pure.cl
+++ b/OpenCL/m00130_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00130_a3-optimized.cl b/OpenCL/m00130_a3-optimized.cl
index c8dd293a8..28bbbb5e2 100644
--- a/OpenCL/m00130_a3-optimized.cl
+++ b/OpenCL/m00130_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00130_a3-pure.cl b/OpenCL/m00130_a3-pure.cl
index 00e20212c..d86d1b541 100644
--- a/OpenCL/m00130_a3-pure.cl
+++ b/OpenCL/m00130_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00140_a0-optimized.cl b/OpenCL/m00140_a0-optimized.cl
index 32eeea008..52da035e7 100644
--- a/OpenCL/m00140_a0-optimized.cl
+++ b/OpenCL/m00140_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00140_a0-pure.cl b/OpenCL/m00140_a0-pure.cl
index d1a14354f..54a7f92c8 100644
--- a/OpenCL/m00140_a0-pure.cl
+++ b/OpenCL/m00140_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00140_a1-optimized.cl b/OpenCL/m00140_a1-optimized.cl
index 1019236d7..bbe818983 100644
--- a/OpenCL/m00140_a1-optimized.cl
+++ b/OpenCL/m00140_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00140_a1-pure.cl b/OpenCL/m00140_a1-pure.cl
index 902636b85..e1a820a26 100644
--- a/OpenCL/m00140_a1-pure.cl
+++ b/OpenCL/m00140_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00140_a3-optimized.cl b/OpenCL/m00140_a3-optimized.cl
index 8db77efc5..3693569a1 100644
--- a/OpenCL/m00140_a3-optimized.cl
+++ b/OpenCL/m00140_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00140_a3-pure.cl b/OpenCL/m00140_a3-pure.cl
index f68773c30..a6c7a12fd 100644
--- a/OpenCL/m00140_a3-pure.cl
+++ b/OpenCL/m00140_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00150_a0-optimized.cl b/OpenCL/m00150_a0-optimized.cl
index 65e8267bd..4e4d4c822 100644
--- a/OpenCL/m00150_a0-optimized.cl
+++ b/OpenCL/m00150_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00150_a0-pure.cl b/OpenCL/m00150_a0-pure.cl
index 47d679903..1e9bf4e27 100644
--- a/OpenCL/m00150_a0-pure.cl
+++ b/OpenCL/m00150_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00150_a1-optimized.cl b/OpenCL/m00150_a1-optimized.cl
index 3c2274d21..7ced69a81 100644
--- a/OpenCL/m00150_a1-optimized.cl
+++ b/OpenCL/m00150_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00150_a1-pure.cl b/OpenCL/m00150_a1-pure.cl
index 24153ff0f..9278eba7d 100644
--- a/OpenCL/m00150_a1-pure.cl
+++ b/OpenCL/m00150_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00150_a3-optimized.cl b/OpenCL/m00150_a3-optimized.cl
index 45d124de5..914ea49a1 100644
--- a/OpenCL/m00150_a3-optimized.cl
+++ b/OpenCL/m00150_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00150_a3-pure.cl b/OpenCL/m00150_a3-pure.cl
index f4703db48..a77d52177 100644
--- a/OpenCL/m00150_a3-pure.cl
+++ b/OpenCL/m00150_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00160_a0-optimized.cl b/OpenCL/m00160_a0-optimized.cl
index 7a1be5004..eabf353d8 100644
--- a/OpenCL/m00160_a0-optimized.cl
+++ b/OpenCL/m00160_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00160_a0-pure.cl b/OpenCL/m00160_a0-pure.cl
index 5ae8c69d1..700766036 100644
--- a/OpenCL/m00160_a0-pure.cl
+++ b/OpenCL/m00160_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00160_a1-optimized.cl b/OpenCL/m00160_a1-optimized.cl
index 894c31530..bbf9c1a4e 100644
--- a/OpenCL/m00160_a1-optimized.cl
+++ b/OpenCL/m00160_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00160_a1-pure.cl b/OpenCL/m00160_a1-pure.cl
index 39a1a1c10..0b7b9a97d 100644
--- a/OpenCL/m00160_a1-pure.cl
+++ b/OpenCL/m00160_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00160_a3-optimized.cl b/OpenCL/m00160_a3-optimized.cl
index ff0e7823a..e228783a4 100644
--- a/OpenCL/m00160_a3-optimized.cl
+++ b/OpenCL/m00160_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00160_a3-pure.cl b/OpenCL/m00160_a3-pure.cl
index 24fb6e0ef..3d1e1e650 100644
--- a/OpenCL/m00160_a3-pure.cl
+++ b/OpenCL/m00160_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00200_a0-optimized.cl b/OpenCL/m00200_a0-optimized.cl
index 88144ff0a..c5d334f36 100644
--- a/OpenCL/m00200_a0-optimized.cl
+++ b/OpenCL/m00200_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00200_a1-optimized.cl b/OpenCL/m00200_a1-optimized.cl
index 0d907e343..5589a386a 100644
--- a/OpenCL/m00200_a1-optimized.cl
+++ b/OpenCL/m00200_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m00200_a3-optimized.cl b/OpenCL/m00200_a3-optimized.cl
index 61481aa69..74a1c3234 100644
--- a/OpenCL/m00200_a3-optimized.cl
+++ b/OpenCL/m00200_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m00300_a0-optimized.cl b/OpenCL/m00300_a0-optimized.cl
index df387712d..22294dd64 100644
--- a/OpenCL/m00300_a0-optimized.cl
+++ b/OpenCL/m00300_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00300_a0-pure.cl b/OpenCL/m00300_a0-pure.cl
index 65209c666..4dcff77d8 100644
--- a/OpenCL/m00300_a0-pure.cl
+++ b/OpenCL/m00300_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00300_a1-optimized.cl b/OpenCL/m00300_a1-optimized.cl
index 465d60418..03dfeb1c5 100644
--- a/OpenCL/m00300_a1-optimized.cl
+++ b/OpenCL/m00300_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00300_a1-pure.cl b/OpenCL/m00300_a1-pure.cl
index 0d0614358..63728805b 100644
--- a/OpenCL/m00300_a1-pure.cl
+++ b/OpenCL/m00300_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00300_a3-optimized.cl b/OpenCL/m00300_a3-optimized.cl
index 4fc9de868..2ae4ec0e9 100644
--- a/OpenCL/m00300_a3-optimized.cl
+++ b/OpenCL/m00300_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00300_a3-pure.cl b/OpenCL/m00300_a3-pure.cl
index 4d4865da2..333354daf 100644
--- a/OpenCL/m00300_a3-pure.cl
+++ b/OpenCL/m00300_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m00400-optimized.cl b/OpenCL/m00400-optimized.cl
index 56c4c3956..a66c7b02d 100644
--- a/OpenCL/m00400-optimized.cl
+++ b/OpenCL/m00400-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00400-pure.cl b/OpenCL/m00400-pure.cl
index c251a0380..5eeb0b36d 100644
--- a/OpenCL/m00400-pure.cl
+++ b/OpenCL/m00400-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl
index ba7b6f83f..b530a23d4 100644
--- a/OpenCL/m00500-optimized.cl
+++ b/OpenCL/m00500-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m00500-pure.cl b/OpenCL/m00500-pure.cl
index d30b7d1ae..cbee878dd 100644
--- a/OpenCL/m00500-pure.cl
+++ b/OpenCL/m00500-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m00600_a0-optimized.cl b/OpenCL/m00600_a0-optimized.cl
index 024dae362..e72920690 100644
--- a/OpenCL/m00600_a0-optimized.cl
+++ b/OpenCL/m00600_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00600_a1-optimized.cl b/OpenCL/m00600_a1-optimized.cl
index 9af3a5fbe..9547523ac 100644
--- a/OpenCL/m00600_a1-optimized.cl
+++ b/OpenCL/m00600_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m00600_a3-optimized.cl b/OpenCL/m00600_a3-optimized.cl
index 56cb67eb7..2545bdca5 100644
--- a/OpenCL/m00600_a3-optimized.cl
+++ b/OpenCL/m00600_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m00900_a0-optimized.cl b/OpenCL/m00900_a0-optimized.cl
index d7019c463..5f6dff580 100644
--- a/OpenCL/m00900_a0-optimized.cl
+++ b/OpenCL/m00900_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m00900_a0-pure.cl b/OpenCL/m00900_a0-pure.cl
index a3d36f1f0..e8590bfd5 100644
--- a/OpenCL/m00900_a0-pure.cl
+++ b/OpenCL/m00900_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m00900_a1-optimized.cl b/OpenCL/m00900_a1-optimized.cl
index 759fe7d42..b7df87f0a 100644
--- a/OpenCL/m00900_a1-optimized.cl
+++ b/OpenCL/m00900_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m00900_a1-pure.cl b/OpenCL/m00900_a1-pure.cl
index b7c4d13c7..518656787 100644
--- a/OpenCL/m00900_a1-pure.cl
+++ b/OpenCL/m00900_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m00900_a3-optimized.cl b/OpenCL/m00900_a3-optimized.cl
index a2c3b978b..11e21a483 100644
--- a/OpenCL/m00900_a3-optimized.cl
+++ b/OpenCL/m00900_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m00900_a3-pure.cl b/OpenCL/m00900_a3-pure.cl
index 40f3e297c..49899c0c2 100644
--- a/OpenCL/m00900_a3-pure.cl
+++ b/OpenCL/m00900_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01000_a0-optimized.cl b/OpenCL/m01000_a0-optimized.cl
index 70127a82a..802ab947b 100644
--- a/OpenCL/m01000_a0-optimized.cl
+++ b/OpenCL/m01000_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01000_a0-pure.cl b/OpenCL/m01000_a0-pure.cl
index 1dbdab60d..dee609c2c 100644
--- a/OpenCL/m01000_a0-pure.cl
+++ b/OpenCL/m01000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01000_a1-optimized.cl b/OpenCL/m01000_a1-optimized.cl
index eef1852e8..e3e51132d 100644
--- a/OpenCL/m01000_a1-optimized.cl
+++ b/OpenCL/m01000_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01000_a1-pure.cl b/OpenCL/m01000_a1-pure.cl
index 71b2ab2e4..a08e6fedc 100644
--- a/OpenCL/m01000_a1-pure.cl
+++ b/OpenCL/m01000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01000_a3-optimized.cl b/OpenCL/m01000_a3-optimized.cl
index 9ede59c48..2953dca1e 100644
--- a/OpenCL/m01000_a3-optimized.cl
+++ b/OpenCL/m01000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01000_a3-pure.cl b/OpenCL/m01000_a3-pure.cl
index 91c34a4de..ff3f6a2da 100644
--- a/OpenCL/m01000_a3-pure.cl
+++ b/OpenCL/m01000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01100_a0-optimized.cl b/OpenCL/m01100_a0-optimized.cl
index f31993bf2..5963097df 100644
--- a/OpenCL/m01100_a0-optimized.cl
+++ b/OpenCL/m01100_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01100_a0-pure.cl b/OpenCL/m01100_a0-pure.cl
index 311bf0a12..f7465d7f3 100644
--- a/OpenCL/m01100_a0-pure.cl
+++ b/OpenCL/m01100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01100_a1-optimized.cl b/OpenCL/m01100_a1-optimized.cl
index a63c340d1..79e758fc5 100644
--- a/OpenCL/m01100_a1-optimized.cl
+++ b/OpenCL/m01100_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01100_a1-pure.cl b/OpenCL/m01100_a1-pure.cl
index d7d218ad6..ca4fad103 100644
--- a/OpenCL/m01100_a1-pure.cl
+++ b/OpenCL/m01100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01100_a3-optimized.cl b/OpenCL/m01100_a3-optimized.cl
index 57e255107..9fbbf7952 100644
--- a/OpenCL/m01100_a3-optimized.cl
+++ b/OpenCL/m01100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01100_a3-pure.cl b/OpenCL/m01100_a3-pure.cl
index 155828ab5..f55770378 100644
--- a/OpenCL/m01100_a3-pure.cl
+++ b/OpenCL/m01100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m01300_a0-optimized.cl b/OpenCL/m01300_a0-optimized.cl
index 9b19415d1..90fb84b27 100644
--- a/OpenCL/m01300_a0-optimized.cl
+++ b/OpenCL/m01300_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01300_a0-pure.cl b/OpenCL/m01300_a0-pure.cl
index c837946b9..54ef39265 100644
--- a/OpenCL/m01300_a0-pure.cl
+++ b/OpenCL/m01300_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01300_a1-optimized.cl b/OpenCL/m01300_a1-optimized.cl
index 090c07fe8..fedc61b6d 100644
--- a/OpenCL/m01300_a1-optimized.cl
+++ b/OpenCL/m01300_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha224.cl"
diff --git a/OpenCL/m01300_a1-pure.cl b/OpenCL/m01300_a1-pure.cl
index 54f9ecbf2..3a22e5f8a 100644
--- a/OpenCL/m01300_a1-pure.cl
+++ b/OpenCL/m01300_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha224.cl"
diff --git a/OpenCL/m01300_a3-optimized.cl b/OpenCL/m01300_a3-optimized.cl
index 94e7dadf1..23b8385f5 100644
--- a/OpenCL/m01300_a3-optimized.cl
+++ b/OpenCL/m01300_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha224.cl"
diff --git a/OpenCL/m01300_a3-pure.cl b/OpenCL/m01300_a3-pure.cl
index c5c54972a..1fdc37fdd 100644
--- a/OpenCL/m01300_a3-pure.cl
+++ b/OpenCL/m01300_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha224.cl"
diff --git a/OpenCL/m01400_a0-optimized.cl b/OpenCL/m01400_a0-optimized.cl
index 047b4765f..6b1dc826b 100644
--- a/OpenCL/m01400_a0-optimized.cl
+++ b/OpenCL/m01400_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01400_a0-pure.cl b/OpenCL/m01400_a0-pure.cl
index f591ac0a6..367430536 100644
--- a/OpenCL/m01400_a0-pure.cl
+++ b/OpenCL/m01400_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01400_a1-optimized.cl b/OpenCL/m01400_a1-optimized.cl
index 15cfde5c5..ca0f0d639 100644
--- a/OpenCL/m01400_a1-optimized.cl
+++ b/OpenCL/m01400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01400_a1-pure.cl b/OpenCL/m01400_a1-pure.cl
index 2f29bfeaa..4489e8a8b 100644
--- a/OpenCL/m01400_a1-pure.cl
+++ b/OpenCL/m01400_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01400_a3-optimized.cl b/OpenCL/m01400_a3-optimized.cl
index e2a3694f4..48aa24f8a 100644
--- a/OpenCL/m01400_a3-optimized.cl
+++ b/OpenCL/m01400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01400_a3-pure.cl b/OpenCL/m01400_a3-pure.cl
index 50ae4b91e..86d97ba44 100644
--- a/OpenCL/m01400_a3-pure.cl
+++ b/OpenCL/m01400_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01410_a0-optimized.cl b/OpenCL/m01410_a0-optimized.cl
index 6fae8211a..00e551dd5 100644
--- a/OpenCL/m01410_a0-optimized.cl
+++ b/OpenCL/m01410_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01410_a0-pure.cl b/OpenCL/m01410_a0-pure.cl
index 852574eec..bd91db24a 100644
--- a/OpenCL/m01410_a0-pure.cl
+++ b/OpenCL/m01410_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01410_a1-optimized.cl b/OpenCL/m01410_a1-optimized.cl
index 757bc6cb0..165e953b0 100644
--- a/OpenCL/m01410_a1-optimized.cl
+++ b/OpenCL/m01410_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01410_a1-pure.cl b/OpenCL/m01410_a1-pure.cl
index 4ca539c91..89772c07b 100644
--- a/OpenCL/m01410_a1-pure.cl
+++ b/OpenCL/m01410_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01410_a3-optimized.cl b/OpenCL/m01410_a3-optimized.cl
index 69b680bd9..65752388c 100644
--- a/OpenCL/m01410_a3-optimized.cl
+++ b/OpenCL/m01410_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01410_a3-pure.cl b/OpenCL/m01410_a3-pure.cl
index ec5c0fd00..a362f0f06 100644
--- a/OpenCL/m01410_a3-pure.cl
+++ b/OpenCL/m01410_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01420_a0-optimized.cl b/OpenCL/m01420_a0-optimized.cl
index 154263591..a506c1d8f 100644
--- a/OpenCL/m01420_a0-optimized.cl
+++ b/OpenCL/m01420_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01420_a0-pure.cl b/OpenCL/m01420_a0-pure.cl
index 975487ae7..eac257dde 100644
--- a/OpenCL/m01420_a0-pure.cl
+++ b/OpenCL/m01420_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01420_a1-optimized.cl b/OpenCL/m01420_a1-optimized.cl
index 43030b471..91fa89196 100644
--- a/OpenCL/m01420_a1-optimized.cl
+++ b/OpenCL/m01420_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01420_a1-pure.cl b/OpenCL/m01420_a1-pure.cl
index 6a0b7747b..bd50b619c 100644
--- a/OpenCL/m01420_a1-pure.cl
+++ b/OpenCL/m01420_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01420_a3-optimized.cl b/OpenCL/m01420_a3-optimized.cl
index 3915e0d7f..b19838862 100644
--- a/OpenCL/m01420_a3-optimized.cl
+++ b/OpenCL/m01420_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01420_a3-pure.cl b/OpenCL/m01420_a3-pure.cl
index 197729d3a..b8b7d21c0 100644
--- a/OpenCL/m01420_a3-pure.cl
+++ b/OpenCL/m01420_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01430_a0-optimized.cl b/OpenCL/m01430_a0-optimized.cl
index 0839a7e9e..61077c22e 100644
--- a/OpenCL/m01430_a0-optimized.cl
+++ b/OpenCL/m01430_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01430_a0-pure.cl b/OpenCL/m01430_a0-pure.cl
index 4ed0a82e5..af950d387 100644
--- a/OpenCL/m01430_a0-pure.cl
+++ b/OpenCL/m01430_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01430_a1-optimized.cl b/OpenCL/m01430_a1-optimized.cl
index 70e11482f..4b1c38642 100644
--- a/OpenCL/m01430_a1-optimized.cl
+++ b/OpenCL/m01430_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01430_a1-pure.cl b/OpenCL/m01430_a1-pure.cl
index 3b5db7ce1..b4597fd62 100644
--- a/OpenCL/m01430_a1-pure.cl
+++ b/OpenCL/m01430_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01430_a3-optimized.cl b/OpenCL/m01430_a3-optimized.cl
index 7122b44b2..8476d60d2 100644
--- a/OpenCL/m01430_a3-optimized.cl
+++ b/OpenCL/m01430_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01430_a3-pure.cl b/OpenCL/m01430_a3-pure.cl
index 3827a7ca7..7ed56551b 100644
--- a/OpenCL/m01430_a3-pure.cl
+++ b/OpenCL/m01430_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01440_a0-optimized.cl b/OpenCL/m01440_a0-optimized.cl
index 33383622b..23e81cee4 100644
--- a/OpenCL/m01440_a0-optimized.cl
+++ b/OpenCL/m01440_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01440_a0-pure.cl b/OpenCL/m01440_a0-pure.cl
index 1140dd0b3..d602fb4ba 100644
--- a/OpenCL/m01440_a0-pure.cl
+++ b/OpenCL/m01440_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01440_a1-optimized.cl b/OpenCL/m01440_a1-optimized.cl
index d82c59e7f..90b1f369d 100644
--- a/OpenCL/m01440_a1-optimized.cl
+++ b/OpenCL/m01440_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01440_a1-pure.cl b/OpenCL/m01440_a1-pure.cl
index 31b96ac18..7277bfa91 100644
--- a/OpenCL/m01440_a1-pure.cl
+++ b/OpenCL/m01440_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01440_a3-optimized.cl b/OpenCL/m01440_a3-optimized.cl
index f41486ab4..ce9719a32 100644
--- a/OpenCL/m01440_a3-optimized.cl
+++ b/OpenCL/m01440_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01440_a3-pure.cl b/OpenCL/m01440_a3-pure.cl
index 91208330e..3aad0e874 100644
--- a/OpenCL/m01440_a3-pure.cl
+++ b/OpenCL/m01440_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01450_a0-optimized.cl b/OpenCL/m01450_a0-optimized.cl
index fb0a46c6d..c3716f329 100644
--- a/OpenCL/m01450_a0-optimized.cl
+++ b/OpenCL/m01450_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01450_a0-pure.cl b/OpenCL/m01450_a0-pure.cl
index 6febabf09..036062aa0 100644
--- a/OpenCL/m01450_a0-pure.cl
+++ b/OpenCL/m01450_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01450_a1-optimized.cl b/OpenCL/m01450_a1-optimized.cl
index 32c27e05a..1ff4e577f 100644
--- a/OpenCL/m01450_a1-optimized.cl
+++ b/OpenCL/m01450_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01450_a1-pure.cl b/OpenCL/m01450_a1-pure.cl
index 2dc862f8e..5519e2ce7 100644
--- a/OpenCL/m01450_a1-pure.cl
+++ b/OpenCL/m01450_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01450_a3-optimized.cl b/OpenCL/m01450_a3-optimized.cl
index 20e2730c3..039ea3f5d 100644
--- a/OpenCL/m01450_a3-optimized.cl
+++ b/OpenCL/m01450_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01450_a3-pure.cl b/OpenCL/m01450_a3-pure.cl
index 7a73dfacd..2c60c55d2 100644
--- a/OpenCL/m01450_a3-pure.cl
+++ b/OpenCL/m01450_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01460_a0-optimized.cl b/OpenCL/m01460_a0-optimized.cl
index 709688c82..aa4552081 100644
--- a/OpenCL/m01460_a0-optimized.cl
+++ b/OpenCL/m01460_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01460_a0-pure.cl b/OpenCL/m01460_a0-pure.cl
index c6e2abad6..431d999ad 100644
--- a/OpenCL/m01460_a0-pure.cl
+++ b/OpenCL/m01460_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01460_a1-optimized.cl b/OpenCL/m01460_a1-optimized.cl
index 3edbecca7..6ab70a48a 100644
--- a/OpenCL/m01460_a1-optimized.cl
+++ b/OpenCL/m01460_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01460_a1-pure.cl b/OpenCL/m01460_a1-pure.cl
index b40516132..970130141 100644
--- a/OpenCL/m01460_a1-pure.cl
+++ b/OpenCL/m01460_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01460_a3-optimized.cl b/OpenCL/m01460_a3-optimized.cl
index 67121ae53..07cf8a797 100644
--- a/OpenCL/m01460_a3-optimized.cl
+++ b/OpenCL/m01460_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01460_a3-pure.cl b/OpenCL/m01460_a3-pure.cl
index 01deffbae..70b497159 100644
--- a/OpenCL/m01460_a3-pure.cl
+++ b/OpenCL/m01460_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m01500_a0-pure.cl b/OpenCL/m01500_a0-pure.cl
index a1d94d749..beabe09f9 100644
--- a/OpenCL/m01500_a0-pure.cl
+++ b/OpenCL/m01500_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01500_a1-pure.cl b/OpenCL/m01500_a1-pure.cl
index 81b0a22cb..3d9f06e01 100644
--- a/OpenCL/m01500_a1-pure.cl
+++ b/OpenCL/m01500_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl
index ce3cebf22..28c9f2573 100644
--- a/OpenCL/m01500_a3-pure.cl
+++ b/OpenCL/m01500_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl
index 66dbd88b7..e1110dc00 100644
--- a/OpenCL/m01600-optimized.cl
+++ b/OpenCL/m01600-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m01600-pure.cl b/OpenCL/m01600-pure.cl
index 129ff6ab2..af5972c7a 100644
--- a/OpenCL/m01600-pure.cl
+++ b/OpenCL/m01600-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m01700_a0-optimized.cl b/OpenCL/m01700_a0-optimized.cl
index 695c97f08..bd71b1678 100644
--- a/OpenCL/m01700_a0-optimized.cl
+++ b/OpenCL/m01700_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01700_a0-pure.cl b/OpenCL/m01700_a0-pure.cl
index d6a1f5323..0e4aba9b7 100644
--- a/OpenCL/m01700_a0-pure.cl
+++ b/OpenCL/m01700_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01700_a1-optimized.cl b/OpenCL/m01700_a1-optimized.cl
index 3b42628f8..2716dbdec 100644
--- a/OpenCL/m01700_a1-optimized.cl
+++ b/OpenCL/m01700_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01700_a1-pure.cl b/OpenCL/m01700_a1-pure.cl
index 0a5b46d6d..e3286cce8 100644
--- a/OpenCL/m01700_a1-pure.cl
+++ b/OpenCL/m01700_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01700_a3-optimized.cl b/OpenCL/m01700_a3-optimized.cl
index e56c00969..5e2820592 100644
--- a/OpenCL/m01700_a3-optimized.cl
+++ b/OpenCL/m01700_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01700_a3-pure.cl b/OpenCL/m01700_a3-pure.cl
index 19a23b77c..258aa8765 100644
--- a/OpenCL/m01700_a3-pure.cl
+++ b/OpenCL/m01700_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01710_a0-optimized.cl b/OpenCL/m01710_a0-optimized.cl
index 8bc710215..6f3130ac9 100644
--- a/OpenCL/m01710_a0-optimized.cl
+++ b/OpenCL/m01710_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01710_a0-pure.cl b/OpenCL/m01710_a0-pure.cl
index 6d40f022d..1a4f90a2b 100644
--- a/OpenCL/m01710_a0-pure.cl
+++ b/OpenCL/m01710_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01710_a1-optimized.cl b/OpenCL/m01710_a1-optimized.cl
index 97997deaf..8d8a87380 100644
--- a/OpenCL/m01710_a1-optimized.cl
+++ b/OpenCL/m01710_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01710_a1-pure.cl b/OpenCL/m01710_a1-pure.cl
index aa558a08f..ac14c0b1f 100644
--- a/OpenCL/m01710_a1-pure.cl
+++ b/OpenCL/m01710_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01710_a3-optimized.cl b/OpenCL/m01710_a3-optimized.cl
index fc88c97b9..353d1395b 100644
--- a/OpenCL/m01710_a3-optimized.cl
+++ b/OpenCL/m01710_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01710_a3-pure.cl b/OpenCL/m01710_a3-pure.cl
index cd381f8b1..f275de2fc 100644
--- a/OpenCL/m01710_a3-pure.cl
+++ b/OpenCL/m01710_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01720_a0-optimized.cl b/OpenCL/m01720_a0-optimized.cl
index d2b6624a2..14503946d 100644
--- a/OpenCL/m01720_a0-optimized.cl
+++ b/OpenCL/m01720_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01720_a0-pure.cl b/OpenCL/m01720_a0-pure.cl
index 8744ac81a..3397c31b6 100644
--- a/OpenCL/m01720_a0-pure.cl
+++ b/OpenCL/m01720_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01720_a1-optimized.cl b/OpenCL/m01720_a1-optimized.cl
index db43b2d6f..fbacd956b 100644
--- a/OpenCL/m01720_a1-optimized.cl
+++ b/OpenCL/m01720_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01720_a1-pure.cl b/OpenCL/m01720_a1-pure.cl
index 895635e2d..7ee48823b 100644
--- a/OpenCL/m01720_a1-pure.cl
+++ b/OpenCL/m01720_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01720_a3-optimized.cl b/OpenCL/m01720_a3-optimized.cl
index 205d6a567..168cb7f96 100644
--- a/OpenCL/m01720_a3-optimized.cl
+++ b/OpenCL/m01720_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01720_a3-pure.cl b/OpenCL/m01720_a3-pure.cl
index b4faaaed2..c12c16a75 100644
--- a/OpenCL/m01720_a3-pure.cl
+++ b/OpenCL/m01720_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01730_a0-optimized.cl b/OpenCL/m01730_a0-optimized.cl
index 990dc6712..2d1d4d6bc 100644
--- a/OpenCL/m01730_a0-optimized.cl
+++ b/OpenCL/m01730_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01730_a0-pure.cl b/OpenCL/m01730_a0-pure.cl
index 1f2f9946f..2e6ee476a 100644
--- a/OpenCL/m01730_a0-pure.cl
+++ b/OpenCL/m01730_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01730_a1-optimized.cl b/OpenCL/m01730_a1-optimized.cl
index 51e673e37..7515d2cc0 100644
--- a/OpenCL/m01730_a1-optimized.cl
+++ b/OpenCL/m01730_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01730_a1-pure.cl b/OpenCL/m01730_a1-pure.cl
index e1dd5629d..105807da5 100644
--- a/OpenCL/m01730_a1-pure.cl
+++ b/OpenCL/m01730_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01730_a3-optimized.cl b/OpenCL/m01730_a3-optimized.cl
index cc9a73baa..d2f01afd8 100644
--- a/OpenCL/m01730_a3-optimized.cl
+++ b/OpenCL/m01730_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01730_a3-pure.cl b/OpenCL/m01730_a3-pure.cl
index 119a19f31..f319d97b4 100644
--- a/OpenCL/m01730_a3-pure.cl
+++ b/OpenCL/m01730_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01740_a0-optimized.cl b/OpenCL/m01740_a0-optimized.cl
index 4e046b948..717aba0c0 100644
--- a/OpenCL/m01740_a0-optimized.cl
+++ b/OpenCL/m01740_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01740_a0-pure.cl b/OpenCL/m01740_a0-pure.cl
index 90626bed5..526a0d021 100644
--- a/OpenCL/m01740_a0-pure.cl
+++ b/OpenCL/m01740_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01740_a1-optimized.cl b/OpenCL/m01740_a1-optimized.cl
index 6c9306d76..aa765e337 100644
--- a/OpenCL/m01740_a1-optimized.cl
+++ b/OpenCL/m01740_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01740_a1-pure.cl b/OpenCL/m01740_a1-pure.cl
index 2c1f2fd91..dca49789a 100644
--- a/OpenCL/m01740_a1-pure.cl
+++ b/OpenCL/m01740_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01740_a3-optimized.cl b/OpenCL/m01740_a3-optimized.cl
index 077830ed1..8b6322f85 100644
--- a/OpenCL/m01740_a3-optimized.cl
+++ b/OpenCL/m01740_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01740_a3-pure.cl b/OpenCL/m01740_a3-pure.cl
index 298ff8eb7..f188a6108 100644
--- a/OpenCL/m01740_a3-pure.cl
+++ b/OpenCL/m01740_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01750_a0-optimized.cl b/OpenCL/m01750_a0-optimized.cl
index b8cdee6e8..77115c6cb 100644
--- a/OpenCL/m01750_a0-optimized.cl
+++ b/OpenCL/m01750_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01750_a0-pure.cl b/OpenCL/m01750_a0-pure.cl
index adf938244..f41391d60 100644
--- a/OpenCL/m01750_a0-pure.cl
+++ b/OpenCL/m01750_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01750_a1-optimized.cl b/OpenCL/m01750_a1-optimized.cl
index 56693782b..c7bbc3c22 100644
--- a/OpenCL/m01750_a1-optimized.cl
+++ b/OpenCL/m01750_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01750_a1-pure.cl b/OpenCL/m01750_a1-pure.cl
index ebc65973f..fad3ad3aa 100644
--- a/OpenCL/m01750_a1-pure.cl
+++ b/OpenCL/m01750_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01750_a3-optimized.cl b/OpenCL/m01750_a3-optimized.cl
index 05f3a62f7..29e30c3fb 100644
--- a/OpenCL/m01750_a3-optimized.cl
+++ b/OpenCL/m01750_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01750_a3-pure.cl b/OpenCL/m01750_a3-pure.cl
index 1a2c82e24..3c5c79c1f 100644
--- a/OpenCL/m01750_a3-pure.cl
+++ b/OpenCL/m01750_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01760_a0-optimized.cl b/OpenCL/m01760_a0-optimized.cl
index 2a8fbca09..a88156593 100644
--- a/OpenCL/m01760_a0-optimized.cl
+++ b/OpenCL/m01760_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m01760_a0-pure.cl b/OpenCL/m01760_a0-pure.cl
index 149526465..b340a1ec9 100644
--- a/OpenCL/m01760_a0-pure.cl
+++ b/OpenCL/m01760_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m01760_a1-optimized.cl b/OpenCL/m01760_a1-optimized.cl
index 9285fd6d4..d06f82987 100644
--- a/OpenCL/m01760_a1-optimized.cl
+++ b/OpenCL/m01760_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01760_a1-pure.cl b/OpenCL/m01760_a1-pure.cl
index 81fe1aa1a..485606097 100644
--- a/OpenCL/m01760_a1-pure.cl
+++ b/OpenCL/m01760_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01760_a3-optimized.cl b/OpenCL/m01760_a3-optimized.cl
index ca5ecc77c..4ce0ea423 100644
--- a/OpenCL/m01760_a3-optimized.cl
+++ b/OpenCL/m01760_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01760_a3-pure.cl b/OpenCL/m01760_a3-pure.cl
index 1110e21da..3a59341b4 100644
--- a/OpenCL/m01760_a3-pure.cl
+++ b/OpenCL/m01760_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m01800-optimized.cl b/OpenCL/m01800-optimized.cl
index f9ec2cc02..03c55bef6 100644
--- a/OpenCL/m01800-optimized.cl
+++ b/OpenCL/m01800-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha512.cl"
 #endif
diff --git a/OpenCL/m01800-pure.cl b/OpenCL/m01800-pure.cl
index 64a705d8e..f57c22243 100644
--- a/OpenCL/m01800-pure.cl
+++ b/OpenCL/m01800-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha512.cl"
 #endif
diff --git a/OpenCL/m02000_a0-pure.cl b/OpenCL/m02000_a0-pure.cl
index fe5c937a3..d0d9258e5 100644
--- a/OpenCL/m02000_a0-pure.cl
+++ b/OpenCL/m02000_a0-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/m02000_a1-pure.cl b/OpenCL/m02000_a1-pure.cl
index fe5c937a3..d0d9258e5 100644
--- a/OpenCL/m02000_a1-pure.cl
+++ b/OpenCL/m02000_a1-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/m02000_a3-pure.cl b/OpenCL/m02000_a3-pure.cl
index fe5c937a3..d0d9258e5 100644
--- a/OpenCL/m02000_a3-pure.cl
+++ b/OpenCL/m02000_a3-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/m02100-pure.cl b/OpenCL/m02100-pure.cl
index cde6b322f..0d9e0e756 100644
--- a/OpenCL/m02100-pure.cl
+++ b/OpenCL/m02100-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m02400_a0-optimized.cl b/OpenCL/m02400_a0-optimized.cl
index 471e7ada1..b030782b7 100644
--- a/OpenCL/m02400_a0-optimized.cl
+++ b/OpenCL/m02400_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m02400_a1-optimized.cl b/OpenCL/m02400_a1-optimized.cl
index 286efdaba..013184be7 100644
--- a/OpenCL/m02400_a1-optimized.cl
+++ b/OpenCL/m02400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02400_a3-optimized.cl b/OpenCL/m02400_a3-optimized.cl
index 66fa42907..2db8f1e56 100644
--- a/OpenCL/m02400_a3-optimized.cl
+++ b/OpenCL/m02400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02410_a0-optimized.cl b/OpenCL/m02410_a0-optimized.cl
index e5f277680..d6254dcc0 100644
--- a/OpenCL/m02410_a0-optimized.cl
+++ b/OpenCL/m02410_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m02410_a1-optimized.cl b/OpenCL/m02410_a1-optimized.cl
index 753461bef..594aaaf8b 100644
--- a/OpenCL/m02410_a1-optimized.cl
+++ b/OpenCL/m02410_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02410_a3-optimized.cl b/OpenCL/m02410_a3-optimized.cl
index 76b7eb086..cfa3ae2db 100644
--- a/OpenCL/m02410_a3-optimized.cl
+++ b/OpenCL/m02410_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl
index bb46357f5..13803488b 100644
--- a/OpenCL/m02500-pure.cl
+++ b/OpenCL/m02500-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
@@ -17,6 +18,7 @@
 #else
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_simd.h"
 #include "inc_hash_md5.h"
diff --git a/OpenCL/m02501-pure.cl b/OpenCL/m02501-pure.cl
index f51e8f2db..7572cf5d8 100644
--- a/OpenCL/m02501-pure.cl
+++ b/OpenCL/m02501-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
@@ -17,6 +18,7 @@
 #else
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_simd.h"
 #include "inc_hash_md5.h"
diff --git a/OpenCL/m02610_a0-optimized.cl b/OpenCL/m02610_a0-optimized.cl
index 6744a5026..9d183a451 100644
--- a/OpenCL/m02610_a0-optimized.cl
+++ b/OpenCL/m02610_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m02610_a0-pure.cl b/OpenCL/m02610_a0-pure.cl
index 092557282..425a8ff3b 100644
--- a/OpenCL/m02610_a0-pure.cl
+++ b/OpenCL/m02610_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m02610_a1-optimized.cl b/OpenCL/m02610_a1-optimized.cl
index bca78bc86..869e9b0ef 100644
--- a/OpenCL/m02610_a1-optimized.cl
+++ b/OpenCL/m02610_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02610_a1-pure.cl b/OpenCL/m02610_a1-pure.cl
index ba64a81e5..406379cce 100644
--- a/OpenCL/m02610_a1-pure.cl
+++ b/OpenCL/m02610_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02610_a3-optimized.cl b/OpenCL/m02610_a3-optimized.cl
index 482e34094..ff92796b0 100644
--- a/OpenCL/m02610_a3-optimized.cl
+++ b/OpenCL/m02610_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02610_a3-pure.cl b/OpenCL/m02610_a3-pure.cl
index 6f992a8a5..5e5aeeefe 100644
--- a/OpenCL/m02610_a3-pure.cl
+++ b/OpenCL/m02610_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02710_a0-optimized.cl b/OpenCL/m02710_a0-optimized.cl
index 74c7190f8..00351a75d 100644
--- a/OpenCL/m02710_a0-optimized.cl
+++ b/OpenCL/m02710_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m02710_a1-optimized.cl b/OpenCL/m02710_a1-optimized.cl
index 45595e2d6..84b8d5bbf 100644
--- a/OpenCL/m02710_a1-optimized.cl
+++ b/OpenCL/m02710_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02710_a3-optimized.cl b/OpenCL/m02710_a3-optimized.cl
index c37eb27e9..fd3174776 100644
--- a/OpenCL/m02710_a3-optimized.cl
+++ b/OpenCL/m02710_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02810_a0-optimized.cl b/OpenCL/m02810_a0-optimized.cl
index 3c5094284..73ea2bc99 100644
--- a/OpenCL/m02810_a0-optimized.cl
+++ b/OpenCL/m02810_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m02810_a0-pure.cl b/OpenCL/m02810_a0-pure.cl
index c101ba360..c4b4e1130 100644
--- a/OpenCL/m02810_a0-pure.cl
+++ b/OpenCL/m02810_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m02810_a1-optimized.cl b/OpenCL/m02810_a1-optimized.cl
index d173d4832..027ad454e 100644
--- a/OpenCL/m02810_a1-optimized.cl
+++ b/OpenCL/m02810_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02810_a1-pure.cl b/OpenCL/m02810_a1-pure.cl
index 31cb8de8c..1d15f049d 100644
--- a/OpenCL/m02810_a1-pure.cl
+++ b/OpenCL/m02810_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02810_a3-optimized.cl b/OpenCL/m02810_a3-optimized.cl
index 0fb3dd780..7d3067012 100644
--- a/OpenCL/m02810_a3-optimized.cl
+++ b/OpenCL/m02810_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m02810_a3-pure.cl b/OpenCL/m02810_a3-pure.cl
index 903954989..63c6e2aef 100644
--- a/OpenCL/m02810_a3-pure.cl
+++ b/OpenCL/m02810_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03000_a0-pure.cl b/OpenCL/m03000_a0-pure.cl
index c35d938de..96ef208e2 100644
--- a/OpenCL/m03000_a0-pure.cl
+++ b/OpenCL/m03000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m03000_a1-pure.cl b/OpenCL/m03000_a1-pure.cl
index ffadf5214..d0e7f6b9f 100644
--- a/OpenCL/m03000_a1-pure.cl
+++ b/OpenCL/m03000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl
index 81d25664a..8af1fad35 100644
--- a/OpenCL/m03000_a3-pure.cl
+++ b/OpenCL/m03000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/m03100_a0-optimized.cl b/OpenCL/m03100_a0-optimized.cl
index d37a1db37..25104232d 100644
--- a/OpenCL/m03100_a0-optimized.cl
+++ b/OpenCL/m03100_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m03100_a1-optimized.cl b/OpenCL/m03100_a1-optimized.cl
index 601c1c62b..a4913d622 100644
--- a/OpenCL/m03100_a1-optimized.cl
+++ b/OpenCL/m03100_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_cipher_des.cl"
diff --git a/OpenCL/m03100_a3-optimized.cl b/OpenCL/m03100_a3-optimized.cl
index fb8618271..a6f687f61 100644
--- a/OpenCL/m03100_a3-optimized.cl
+++ b/OpenCL/m03100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_cipher_des.cl"
diff --git a/OpenCL/m03200-pure.cl b/OpenCL/m03200-pure.cl
index 332cca11c..4b7c6c79d 100644
--- a/OpenCL/m03200-pure.cl
+++ b/OpenCL/m03200-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/m03710_a0-optimized.cl b/OpenCL/m03710_a0-optimized.cl
index 1956e8eaf..43c7ef379 100644
--- a/OpenCL/m03710_a0-optimized.cl
+++ b/OpenCL/m03710_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m03710_a0-pure.cl b/OpenCL/m03710_a0-pure.cl
index a5ac06721..109bb82a4 100644
--- a/OpenCL/m03710_a0-pure.cl
+++ b/OpenCL/m03710_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m03710_a1-optimized.cl b/OpenCL/m03710_a1-optimized.cl
index 885e12019..90b0b86a9 100644
--- a/OpenCL/m03710_a1-optimized.cl
+++ b/OpenCL/m03710_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03710_a1-pure.cl b/OpenCL/m03710_a1-pure.cl
index 48804bdc0..15d808240 100644
--- a/OpenCL/m03710_a1-pure.cl
+++ b/OpenCL/m03710_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03710_a3-optimized.cl b/OpenCL/m03710_a3-optimized.cl
index 6a92fe84a..d5ffd7a6c 100644
--- a/OpenCL/m03710_a3-optimized.cl
+++ b/OpenCL/m03710_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03710_a3-pure.cl b/OpenCL/m03710_a3-pure.cl
index 0583ba6e0..56f90d1b9 100644
--- a/OpenCL/m03710_a3-pure.cl
+++ b/OpenCL/m03710_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03800_a0-optimized.cl b/OpenCL/m03800_a0-optimized.cl
index b14fd1569..12cfd7139 100644
--- a/OpenCL/m03800_a0-optimized.cl
+++ b/OpenCL/m03800_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m03800_a0-pure.cl b/OpenCL/m03800_a0-pure.cl
index 4bcc387b9..3fa962f59 100644
--- a/OpenCL/m03800_a0-pure.cl
+++ b/OpenCL/m03800_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m03800_a1-optimized.cl b/OpenCL/m03800_a1-optimized.cl
index 274f3e35d..28d317f5f 100644
--- a/OpenCL/m03800_a1-optimized.cl
+++ b/OpenCL/m03800_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03800_a1-pure.cl b/OpenCL/m03800_a1-pure.cl
index e0b3fbf2d..ae1dd719b 100644
--- a/OpenCL/m03800_a1-pure.cl
+++ b/OpenCL/m03800_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03800_a3-optimized.cl b/OpenCL/m03800_a3-optimized.cl
index 5fbcffba0..e849f703e 100644
--- a/OpenCL/m03800_a3-optimized.cl
+++ b/OpenCL/m03800_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03800_a3-pure.cl b/OpenCL/m03800_a3-pure.cl
index 4f8bf236b..e8189ccea 100644
--- a/OpenCL/m03800_a3-pure.cl
+++ b/OpenCL/m03800_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03910_a0-optimized.cl b/OpenCL/m03910_a0-optimized.cl
index 38586a366..7c7d0413d 100644
--- a/OpenCL/m03910_a0-optimized.cl
+++ b/OpenCL/m03910_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m03910_a0-pure.cl b/OpenCL/m03910_a0-pure.cl
index 79fdb2265..aa1d57b1b 100644
--- a/OpenCL/m03910_a0-pure.cl
+++ b/OpenCL/m03910_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m03910_a1-optimized.cl b/OpenCL/m03910_a1-optimized.cl
index c4b2f5cb2..2ef98cd37 100644
--- a/OpenCL/m03910_a1-optimized.cl
+++ b/OpenCL/m03910_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03910_a1-pure.cl b/OpenCL/m03910_a1-pure.cl
index a27aa1fa4..baeb1a49e 100644
--- a/OpenCL/m03910_a1-pure.cl
+++ b/OpenCL/m03910_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03910_a3-optimized.cl b/OpenCL/m03910_a3-optimized.cl
index 3134cd506..faf3eef4e 100644
--- a/OpenCL/m03910_a3-optimized.cl
+++ b/OpenCL/m03910_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m03910_a3-pure.cl b/OpenCL/m03910_a3-pure.cl
index 73698ba4e..caadebe9c 100644
--- a/OpenCL/m03910_a3-pure.cl
+++ b/OpenCL/m03910_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04010_a0-optimized.cl b/OpenCL/m04010_a0-optimized.cl
index 8f675941a..43aa224d6 100644
--- a/OpenCL/m04010_a0-optimized.cl
+++ b/OpenCL/m04010_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04010_a0-pure.cl b/OpenCL/m04010_a0-pure.cl
index 6557df376..3a61a7461 100644
--- a/OpenCL/m04010_a0-pure.cl
+++ b/OpenCL/m04010_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04010_a1-optimized.cl b/OpenCL/m04010_a1-optimized.cl
index 22f5e9830..db8887b8c 100644
--- a/OpenCL/m04010_a1-optimized.cl
+++ b/OpenCL/m04010_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04010_a1-pure.cl b/OpenCL/m04010_a1-pure.cl
index 935273eb0..4f25cb1c6 100644
--- a/OpenCL/m04010_a1-pure.cl
+++ b/OpenCL/m04010_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04010_a3-optimized.cl b/OpenCL/m04010_a3-optimized.cl
index 4d69d9c25..7be89bdf2 100644
--- a/OpenCL/m04010_a3-optimized.cl
+++ b/OpenCL/m04010_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04010_a3-pure.cl b/OpenCL/m04010_a3-pure.cl
index 3bc48a738..2c8c112c0 100644
--- a/OpenCL/m04010_a3-pure.cl
+++ b/OpenCL/m04010_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04110_a0-optimized.cl b/OpenCL/m04110_a0-optimized.cl
index 972d0113f..b47546c9a 100644
--- a/OpenCL/m04110_a0-optimized.cl
+++ b/OpenCL/m04110_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04110_a0-pure.cl b/OpenCL/m04110_a0-pure.cl
index 2426e5a16..8947b7726 100644
--- a/OpenCL/m04110_a0-pure.cl
+++ b/OpenCL/m04110_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04110_a1-optimized.cl b/OpenCL/m04110_a1-optimized.cl
index af3cb9274..01cd6749e 100644
--- a/OpenCL/m04110_a1-optimized.cl
+++ b/OpenCL/m04110_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04110_a1-pure.cl b/OpenCL/m04110_a1-pure.cl
index dd230f3fe..60de2e1e9 100644
--- a/OpenCL/m04110_a1-pure.cl
+++ b/OpenCL/m04110_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04110_a3-optimized.cl b/OpenCL/m04110_a3-optimized.cl
index 3521ecb6d..3c68d886b 100644
--- a/OpenCL/m04110_a3-optimized.cl
+++ b/OpenCL/m04110_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04110_a3-pure.cl b/OpenCL/m04110_a3-pure.cl
index e7c4bfbd0..705a56903 100644
--- a/OpenCL/m04110_a3-pure.cl
+++ b/OpenCL/m04110_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04310_a0-optimized.cl b/OpenCL/m04310_a0-optimized.cl
index 33bf2d335..eb14690b8 100644
--- a/OpenCL/m04310_a0-optimized.cl
+++ b/OpenCL/m04310_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04310_a0-pure.cl b/OpenCL/m04310_a0-pure.cl
index cec409c1d..fe94dfc83 100644
--- a/OpenCL/m04310_a0-pure.cl
+++ b/OpenCL/m04310_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04310_a1-optimized.cl b/OpenCL/m04310_a1-optimized.cl
index bbd0c9508..c9bad2483 100644
--- a/OpenCL/m04310_a1-optimized.cl
+++ b/OpenCL/m04310_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04310_a1-pure.cl b/OpenCL/m04310_a1-pure.cl
index b0acaf470..b8fe23108 100644
--- a/OpenCL/m04310_a1-pure.cl
+++ b/OpenCL/m04310_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04310_a3-optimized.cl b/OpenCL/m04310_a3-optimized.cl
index 6ecf6b1b3..7fc003101 100644
--- a/OpenCL/m04310_a3-optimized.cl
+++ b/OpenCL/m04310_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04310_a3-pure.cl b/OpenCL/m04310_a3-pure.cl
index d40944557..171b2c870 100644
--- a/OpenCL/m04310_a3-pure.cl
+++ b/OpenCL/m04310_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04400_a0-optimized.cl b/OpenCL/m04400_a0-optimized.cl
index 05e04b25a..250cde03e 100644
--- a/OpenCL/m04400_a0-optimized.cl
+++ b/OpenCL/m04400_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04400_a0-pure.cl b/OpenCL/m04400_a0-pure.cl
index dc63016e7..ebf3b90f5 100644
--- a/OpenCL/m04400_a0-pure.cl
+++ b/OpenCL/m04400_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04400_a1-optimized.cl b/OpenCL/m04400_a1-optimized.cl
index 698ef9d86..c577e2f1e 100644
--- a/OpenCL/m04400_a1-optimized.cl
+++ b/OpenCL/m04400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04400_a1-pure.cl b/OpenCL/m04400_a1-pure.cl
index 73df68e7b..3841a1944 100644
--- a/OpenCL/m04400_a1-pure.cl
+++ b/OpenCL/m04400_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04400_a3-optimized.cl b/OpenCL/m04400_a3-optimized.cl
index 220db208f..44d0c22ee 100644
--- a/OpenCL/m04400_a3-optimized.cl
+++ b/OpenCL/m04400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04400_a3-pure.cl b/OpenCL/m04400_a3-pure.cl
index 5cdee2e1a..a4d922d68 100644
--- a/OpenCL/m04400_a3-pure.cl
+++ b/OpenCL/m04400_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04500_a0-optimized.cl b/OpenCL/m04500_a0-optimized.cl
index aa6785d5c..d6a9cf83b 100644
--- a/OpenCL/m04500_a0-optimized.cl
+++ b/OpenCL/m04500_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04500_a0-pure.cl b/OpenCL/m04500_a0-pure.cl
index b8a539961..a63fd8320 100644
--- a/OpenCL/m04500_a0-pure.cl
+++ b/OpenCL/m04500_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04500_a1-optimized.cl b/OpenCL/m04500_a1-optimized.cl
index e9e0c42f9..088bf10ef 100644
--- a/OpenCL/m04500_a1-optimized.cl
+++ b/OpenCL/m04500_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04500_a1-pure.cl b/OpenCL/m04500_a1-pure.cl
index eead59928..9ffd4672a 100644
--- a/OpenCL/m04500_a1-pure.cl
+++ b/OpenCL/m04500_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04500_a3-optimized.cl b/OpenCL/m04500_a3-optimized.cl
index a0f9f632f..a96781501 100644
--- a/OpenCL/m04500_a3-optimized.cl
+++ b/OpenCL/m04500_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04500_a3-pure.cl b/OpenCL/m04500_a3-pure.cl
index 82173d7a4..cd3a15aca 100644
--- a/OpenCL/m04500_a3-pure.cl
+++ b/OpenCL/m04500_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04520_a0-optimized.cl b/OpenCL/m04520_a0-optimized.cl
index 237702cc0..da1d19413 100644
--- a/OpenCL/m04520_a0-optimized.cl
+++ b/OpenCL/m04520_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04520_a0-pure.cl b/OpenCL/m04520_a0-pure.cl
index 5cd66b395..2879d8278 100644
--- a/OpenCL/m04520_a0-pure.cl
+++ b/OpenCL/m04520_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04520_a1-optimized.cl b/OpenCL/m04520_a1-optimized.cl
index 2c6cf6a41..36e773413 100644
--- a/OpenCL/m04520_a1-optimized.cl
+++ b/OpenCL/m04520_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04520_a1-pure.cl b/OpenCL/m04520_a1-pure.cl
index 4a34801fc..4d3d47132 100644
--- a/OpenCL/m04520_a1-pure.cl
+++ b/OpenCL/m04520_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04520_a3-optimized.cl b/OpenCL/m04520_a3-optimized.cl
index 52e229952..ddf48573d 100644
--- a/OpenCL/m04520_a3-optimized.cl
+++ b/OpenCL/m04520_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04520_a3-pure.cl b/OpenCL/m04520_a3-pure.cl
index 8ced12d82..8a5a0839c 100644
--- a/OpenCL/m04520_a3-pure.cl
+++ b/OpenCL/m04520_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04700_a0-optimized.cl b/OpenCL/m04700_a0-optimized.cl
index 19ffea0a4..bdb3ec553 100644
--- a/OpenCL/m04700_a0-optimized.cl
+++ b/OpenCL/m04700_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04700_a0-pure.cl b/OpenCL/m04700_a0-pure.cl
index 3645fa853..fa46ddc1a 100644
--- a/OpenCL/m04700_a0-pure.cl
+++ b/OpenCL/m04700_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04700_a1-optimized.cl b/OpenCL/m04700_a1-optimized.cl
index 982e57043..802f4aebd 100644
--- a/OpenCL/m04700_a1-optimized.cl
+++ b/OpenCL/m04700_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04700_a1-pure.cl b/OpenCL/m04700_a1-pure.cl
index ddc4286ed..04d17f749 100644
--- a/OpenCL/m04700_a1-pure.cl
+++ b/OpenCL/m04700_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04700_a3-optimized.cl b/OpenCL/m04700_a3-optimized.cl
index e2c1ab949..e103191d5 100644
--- a/OpenCL/m04700_a3-optimized.cl
+++ b/OpenCL/m04700_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04700_a3-pure.cl b/OpenCL/m04700_a3-pure.cl
index 26a7524be..81f473b37 100644
--- a/OpenCL/m04700_a3-pure.cl
+++ b/OpenCL/m04700_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04800_a0-optimized.cl b/OpenCL/m04800_a0-optimized.cl
index 03418d5d4..c58d89a00 100644
--- a/OpenCL/m04800_a0-optimized.cl
+++ b/OpenCL/m04800_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04800_a0-pure.cl b/OpenCL/m04800_a0-pure.cl
index f3c7646f0..54c0bf973 100644
--- a/OpenCL/m04800_a0-pure.cl
+++ b/OpenCL/m04800_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04800_a1-optimized.cl b/OpenCL/m04800_a1-optimized.cl
index b31486ccc..3384bcc4e 100644
--- a/OpenCL/m04800_a1-optimized.cl
+++ b/OpenCL/m04800_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04800_a1-pure.cl b/OpenCL/m04800_a1-pure.cl
index 75f493f7a..2a0de51c9 100644
--- a/OpenCL/m04800_a1-pure.cl
+++ b/OpenCL/m04800_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04800_a3-optimized.cl b/OpenCL/m04800_a3-optimized.cl
index 5da3ee496..98343c1dd 100644
--- a/OpenCL/m04800_a3-optimized.cl
+++ b/OpenCL/m04800_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04800_a3-pure.cl b/OpenCL/m04800_a3-pure.cl
index 43284c7af..a7a3fcf7b 100644
--- a/OpenCL/m04800_a3-pure.cl
+++ b/OpenCL/m04800_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m04900_a0-optimized.cl b/OpenCL/m04900_a0-optimized.cl
index 601b5b1d9..d1e686ac5 100644
--- a/OpenCL/m04900_a0-optimized.cl
+++ b/OpenCL/m04900_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m04900_a0-pure.cl b/OpenCL/m04900_a0-pure.cl
index ef31db611..cab847cbf 100644
--- a/OpenCL/m04900_a0-pure.cl
+++ b/OpenCL/m04900_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m04900_a1-optimized.cl b/OpenCL/m04900_a1-optimized.cl
index e5be3e814..d51a5f407 100644
--- a/OpenCL/m04900_a1-optimized.cl
+++ b/OpenCL/m04900_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04900_a1-pure.cl b/OpenCL/m04900_a1-pure.cl
index acd27d749..357ba67bc 100644
--- a/OpenCL/m04900_a1-pure.cl
+++ b/OpenCL/m04900_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04900_a3-optimized.cl b/OpenCL/m04900_a3-optimized.cl
index e3aa883cc..199e11ca4 100644
--- a/OpenCL/m04900_a3-optimized.cl
+++ b/OpenCL/m04900_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m04900_a3-pure.cl b/OpenCL/m04900_a3-pure.cl
index 6d6b02b9f..c9d731d86 100644
--- a/OpenCL/m04900_a3-pure.cl
+++ b/OpenCL/m04900_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m05100_a0-optimized.cl b/OpenCL/m05100_a0-optimized.cl
index 90441fc0a..4790fef9b 100644
--- a/OpenCL/m05100_a0-optimized.cl
+++ b/OpenCL/m05100_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m05100_a0-pure.cl b/OpenCL/m05100_a0-pure.cl
index f71d763b9..41f3ce539 100644
--- a/OpenCL/m05100_a0-pure.cl
+++ b/OpenCL/m05100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m05100_a1-optimized.cl b/OpenCL/m05100_a1-optimized.cl
index efa881b95..d3f20fdd0 100644
--- a/OpenCL/m05100_a1-optimized.cl
+++ b/OpenCL/m05100_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m05100_a1-pure.cl b/OpenCL/m05100_a1-pure.cl
index 3e8e8c153..861e93519 100644
--- a/OpenCL/m05100_a1-pure.cl
+++ b/OpenCL/m05100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m05100_a3-optimized.cl b/OpenCL/m05100_a3-optimized.cl
index f4be1c794..2c897959b 100644
--- a/OpenCL/m05100_a3-optimized.cl
+++ b/OpenCL/m05100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m05100_a3-pure.cl b/OpenCL/m05100_a3-pure.cl
index 6dcea48d1..02393f913 100644
--- a/OpenCL/m05100_a3-pure.cl
+++ b/OpenCL/m05100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m05200-pure.cl b/OpenCL/m05200-pure.cl
index 8edf41a04..194b0fa02 100644
--- a/OpenCL/m05200-pure.cl
+++ b/OpenCL/m05200-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m05300_a0-optimized.cl b/OpenCL/m05300_a0-optimized.cl
index f20d0d097..5ec319e24 100644
--- a/OpenCL/m05300_a0-optimized.cl
+++ b/OpenCL/m05300_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m05300_a0-pure.cl b/OpenCL/m05300_a0-pure.cl
index 8337daff1..53d48ecaa 100644
--- a/OpenCL/m05300_a0-pure.cl
+++ b/OpenCL/m05300_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m05300_a1-optimized.cl b/OpenCL/m05300_a1-optimized.cl
index fc2f75c04..c4c8c955c 100644
--- a/OpenCL/m05300_a1-optimized.cl
+++ b/OpenCL/m05300_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m05300_a1-pure.cl b/OpenCL/m05300_a1-pure.cl
index 8aa4b80d3..39b917d91 100644
--- a/OpenCL/m05300_a1-pure.cl
+++ b/OpenCL/m05300_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m05300_a3-optimized.cl b/OpenCL/m05300_a3-optimized.cl
index 1f961d197..b4b8d2173 100644
--- a/OpenCL/m05300_a3-optimized.cl
+++ b/OpenCL/m05300_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m05300_a3-pure.cl b/OpenCL/m05300_a3-pure.cl
index c89b6ab10..76d6c0bc9 100644
--- a/OpenCL/m05300_a3-pure.cl
+++ b/OpenCL/m05300_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m05400_a0-optimized.cl b/OpenCL/m05400_a0-optimized.cl
index c2332db9e..1201d0155 100644
--- a/OpenCL/m05400_a0-optimized.cl
+++ b/OpenCL/m05400_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m05400_a0-pure.cl b/OpenCL/m05400_a0-pure.cl
index de3dee4bf..f909c3846 100644
--- a/OpenCL/m05400_a0-pure.cl
+++ b/OpenCL/m05400_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m05400_a1-optimized.cl b/OpenCL/m05400_a1-optimized.cl
index 9ca6763e1..f90f918f2 100644
--- a/OpenCL/m05400_a1-optimized.cl
+++ b/OpenCL/m05400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m05400_a1-pure.cl b/OpenCL/m05400_a1-pure.cl
index abb5cfce3..f2d054bc6 100644
--- a/OpenCL/m05400_a1-pure.cl
+++ b/OpenCL/m05400_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m05400_a3-optimized.cl b/OpenCL/m05400_a3-optimized.cl
index 6a1b47117..898fc162c 100644
--- a/OpenCL/m05400_a3-optimized.cl
+++ b/OpenCL/m05400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m05400_a3-pure.cl b/OpenCL/m05400_a3-pure.cl
index ed16f2ca9..8e4c63305 100644
--- a/OpenCL/m05400_a3-pure.cl
+++ b/OpenCL/m05400_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m05500_a0-optimized.cl b/OpenCL/m05500_a0-optimized.cl
index 7a3a58037..f9fd1e11a 100644
--- a/OpenCL/m05500_a0-optimized.cl
+++ b/OpenCL/m05500_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m05500_a0-pure.cl b/OpenCL/m05500_a0-pure.cl
index f13503698..aaef0fbf9 100644
--- a/OpenCL/m05500_a0-pure.cl
+++ b/OpenCL/m05500_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m05500_a1-optimized.cl b/OpenCL/m05500_a1-optimized.cl
index bd80ae13e..a1f12ff86 100644
--- a/OpenCL/m05500_a1-optimized.cl
+++ b/OpenCL/m05500_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m05500_a1-pure.cl b/OpenCL/m05500_a1-pure.cl
index 577117f3a..3e482266c 100644
--- a/OpenCL/m05500_a1-pure.cl
+++ b/OpenCL/m05500_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl
index b1a34c2c0..ec25450db 100644
--- a/OpenCL/m05500_a3-optimized.cl
+++ b/OpenCL/m05500_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m05500_a3-pure.cl b/OpenCL/m05500_a3-pure.cl
index 698003a40..eca392158 100644
--- a/OpenCL/m05500_a3-pure.cl
+++ b/OpenCL/m05500_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m05600_a0-optimized.cl b/OpenCL/m05600_a0-optimized.cl
index 1ee105135..ec3260e7a 100644
--- a/OpenCL/m05600_a0-optimized.cl
+++ b/OpenCL/m05600_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m05600_a0-pure.cl b/OpenCL/m05600_a0-pure.cl
index 699e5e2df..25d56067f 100644
--- a/OpenCL/m05600_a0-pure.cl
+++ b/OpenCL/m05600_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m05600_a1-optimized.cl b/OpenCL/m05600_a1-optimized.cl
index c8d7bdf11..d208a87d5 100644
--- a/OpenCL/m05600_a1-optimized.cl
+++ b/OpenCL/m05600_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m05600_a1-pure.cl b/OpenCL/m05600_a1-pure.cl
index aefa8323c..91ea91a53 100644
--- a/OpenCL/m05600_a1-pure.cl
+++ b/OpenCL/m05600_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m05600_a3-optimized.cl b/OpenCL/m05600_a3-optimized.cl
index 646e07455..f9204f9f0 100644
--- a/OpenCL/m05600_a3-optimized.cl
+++ b/OpenCL/m05600_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m05600_a3-pure.cl b/OpenCL/m05600_a3-pure.cl
index d5d48f5b7..7f102a56f 100644
--- a/OpenCL/m05600_a3-pure.cl
+++ b/OpenCL/m05600_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl
index bedf6625e..658a9f93e 100644
--- a/OpenCL/m05800-optimized.cl
+++ b/OpenCL/m05800-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha1.cl"
 #endif
diff --git a/OpenCL/m05800-pure.cl b/OpenCL/m05800-pure.cl
index 465e47176..e7cddac46 100644
--- a/OpenCL/m05800-pure.cl
+++ b/OpenCL/m05800-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha1.cl"
 #endif
diff --git a/OpenCL/m06000_a0-optimized.cl b/OpenCL/m06000_a0-optimized.cl
index e38bfd5a6..87ef31990 100644
--- a/OpenCL/m06000_a0-optimized.cl
+++ b/OpenCL/m06000_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m06000_a0-pure.cl b/OpenCL/m06000_a0-pure.cl
index 0573efd7d..d25b558ae 100644
--- a/OpenCL/m06000_a0-pure.cl
+++ b/OpenCL/m06000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m06000_a1-optimized.cl b/OpenCL/m06000_a1-optimized.cl
index 640132315..05934117d 100644
--- a/OpenCL/m06000_a1-optimized.cl
+++ b/OpenCL/m06000_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m06000_a1-pure.cl b/OpenCL/m06000_a1-pure.cl
index fbbea68ad..1eda2781d 100644
--- a/OpenCL/m06000_a1-pure.cl
+++ b/OpenCL/m06000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m06000_a3-optimized.cl b/OpenCL/m06000_a3-optimized.cl
index 966fcf15f..fab740436 100644
--- a/OpenCL/m06000_a3-optimized.cl
+++ b/OpenCL/m06000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m06000_a3-pure.cl b/OpenCL/m06000_a3-pure.cl
index d7bbe5a02..3fbbaca59 100644
--- a/OpenCL/m06000_a3-pure.cl
+++ b/OpenCL/m06000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m06100_a0-optimized.cl b/OpenCL/m06100_a0-optimized.cl
index 00243058c..3c6dd15f7 100644
--- a/OpenCL/m06100_a0-optimized.cl
+++ b/OpenCL/m06100_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m06100_a0-pure.cl b/OpenCL/m06100_a0-pure.cl
index 4482625ca..694081d4a 100644
--- a/OpenCL/m06100_a0-pure.cl
+++ b/OpenCL/m06100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m06100_a1-optimized.cl b/OpenCL/m06100_a1-optimized.cl
index e6cb58a41..78435f227 100644
--- a/OpenCL/m06100_a1-optimized.cl
+++ b/OpenCL/m06100_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m06100_a1-pure.cl b/OpenCL/m06100_a1-pure.cl
index aea3847fb..7eaa1eead 100644
--- a/OpenCL/m06100_a1-pure.cl
+++ b/OpenCL/m06100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl
index b9e10a98f..cd97b7644 100644
--- a/OpenCL/m06100_a3-optimized.cl
+++ b/OpenCL/m06100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m06100_a3-pure.cl b/OpenCL/m06100_a3-pure.cl
index 2f5bedfb1..ab5a989ff 100644
--- a/OpenCL/m06100_a3-pure.cl
+++ b/OpenCL/m06100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m06211-pure.cl b/OpenCL/m06211-pure.cl
index 7e84978c8..4f3b884ac 100644
--- a/OpenCL/m06211-pure.cl
+++ b/OpenCL/m06211-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m06212-pure.cl b/OpenCL/m06212-pure.cl
index a60a315ee..d002aaa11 100644
--- a/OpenCL/m06212-pure.cl
+++ b/OpenCL/m06212-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m06213-pure.cl b/OpenCL/m06213-pure.cl
index 45b304de7..48dc1f6c3 100644
--- a/OpenCL/m06213-pure.cl
+++ b/OpenCL/m06213-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m06221-pure.cl b/OpenCL/m06221-pure.cl
index 0e8dc1e5f..8860e0d5c 100644
--- a/OpenCL/m06221-pure.cl
+++ b/OpenCL/m06221-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m06222-pure.cl b/OpenCL/m06222-pure.cl
index 2af5c856f..508da0bbe 100644
--- a/OpenCL/m06222-pure.cl
+++ b/OpenCL/m06222-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m06223-pure.cl b/OpenCL/m06223-pure.cl
index 3721447bc..75ff65068 100644
--- a/OpenCL/m06223-pure.cl
+++ b/OpenCL/m06223-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl
index 57d84f57c..15e94c0e0 100644
--- a/OpenCL/m06231-pure.cl
+++ b/OpenCL/m06231-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl
index b369aa7cb..378f46ffd 100644
--- a/OpenCL/m06232-pure.cl
+++ b/OpenCL/m06232-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl
index a1b94ae89..9e1e31ad6 100644
--- a/OpenCL/m06233-pure.cl
+++ b/OpenCL/m06233-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl
index fb1169aea..fe57526b5 100644
--- a/OpenCL/m06300-optimized.cl
+++ b/OpenCL/m06300-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m06300-pure.cl b/OpenCL/m06300-pure.cl
index 360f2e4e9..ec654ab6d 100644
--- a/OpenCL/m06300-pure.cl
+++ b/OpenCL/m06300-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m06400-pure.cl b/OpenCL/m06400-pure.cl
index 0b7ba781e..82d30025c 100644
--- a/OpenCL/m06400-pure.cl
+++ b/OpenCL/m06400-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m06500-pure.cl b/OpenCL/m06500-pure.cl
index 4a4a5b449..73a154a64 100644
--- a/OpenCL/m06500-pure.cl
+++ b/OpenCL/m06500-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m06600-pure.cl b/OpenCL/m06600-pure.cl
index 245877c4e..409087e2f 100644
--- a/OpenCL/m06600-pure.cl
+++ b/OpenCL/m06600-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m06700-pure.cl b/OpenCL/m06700-pure.cl
index 9c9edafc2..df9d16f1e 100644
--- a/OpenCL/m06700-pure.cl
+++ b/OpenCL/m06700-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m06800-pure.cl b/OpenCL/m06800-pure.cl
index 5b78bfc50..daa69e2ff 100644
--- a/OpenCL/m06800-pure.cl
+++ b/OpenCL/m06800-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m06900_a0-optimized.cl b/OpenCL/m06900_a0-optimized.cl
index 3274d0665..07939b450 100644
--- a/OpenCL/m06900_a0-optimized.cl
+++ b/OpenCL/m06900_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m06900_a1-optimized.cl b/OpenCL/m06900_a1-optimized.cl
index 156b85b2f..7cf40f6cf 100644
--- a/OpenCL/m06900_a1-optimized.cl
+++ b/OpenCL/m06900_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m06900_a3-optimized.cl b/OpenCL/m06900_a3-optimized.cl
index d68d73cc8..abfcda648 100644
--- a/OpenCL/m06900_a3-optimized.cl
+++ b/OpenCL/m06900_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m07000_a0-optimized.cl b/OpenCL/m07000_a0-optimized.cl
index f5d00ff09..b2f6f66a9 100644
--- a/OpenCL/m07000_a0-optimized.cl
+++ b/OpenCL/m07000_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m07000_a0-pure.cl b/OpenCL/m07000_a0-pure.cl
index 05c507291..f3bb57959 100644
--- a/OpenCL/m07000_a0-pure.cl
+++ b/OpenCL/m07000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m07000_a1-optimized.cl b/OpenCL/m07000_a1-optimized.cl
index 943fcc6d1..a5c914433 100644
--- a/OpenCL/m07000_a1-optimized.cl
+++ b/OpenCL/m07000_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07000_a1-pure.cl b/OpenCL/m07000_a1-pure.cl
index 4e623ad2d..f6f43d72c 100644
--- a/OpenCL/m07000_a1-pure.cl
+++ b/OpenCL/m07000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07000_a3-optimized.cl b/OpenCL/m07000_a3-optimized.cl
index f3db19268..5757962da 100644
--- a/OpenCL/m07000_a3-optimized.cl
+++ b/OpenCL/m07000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07000_a3-pure.cl b/OpenCL/m07000_a3-pure.cl
index 52852d473..db6f00a5f 100644
--- a/OpenCL/m07000_a3-pure.cl
+++ b/OpenCL/m07000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07100-pure.cl b/OpenCL/m07100-pure.cl
index 9426ceb2f..2cee7e0b0 100644
--- a/OpenCL/m07100-pure.cl
+++ b/OpenCL/m07100-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m07300_a0-optimized.cl b/OpenCL/m07300_a0-optimized.cl
index c1744096c..35f4a3639 100644
--- a/OpenCL/m07300_a0-optimized.cl
+++ b/OpenCL/m07300_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m07300_a0-pure.cl b/OpenCL/m07300_a0-pure.cl
index 879fbaa5b..582e6e7c8 100644
--- a/OpenCL/m07300_a0-pure.cl
+++ b/OpenCL/m07300_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m07300_a1-optimized.cl b/OpenCL/m07300_a1-optimized.cl
index cf142f29a..83fce7cf9 100644
--- a/OpenCL/m07300_a1-optimized.cl
+++ b/OpenCL/m07300_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07300_a1-pure.cl b/OpenCL/m07300_a1-pure.cl
index 26bf80185..8771d4d7a 100644
--- a/OpenCL/m07300_a1-pure.cl
+++ b/OpenCL/m07300_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07300_a3-optimized.cl b/OpenCL/m07300_a3-optimized.cl
index 7b9e7a78d..b3b74c963 100644
--- a/OpenCL/m07300_a3-optimized.cl
+++ b/OpenCL/m07300_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07300_a3-pure.cl b/OpenCL/m07300_a3-pure.cl
index bfdf6824c..d8ef2e43c 100644
--- a/OpenCL/m07300_a3-pure.cl
+++ b/OpenCL/m07300_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl
index 0b56927cb..5d8cf6af0 100644
--- a/OpenCL/m07400-optimized.cl
+++ b/OpenCL/m07400-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha256.cl"
 #endif
diff --git a/OpenCL/m07400-pure.cl b/OpenCL/m07400-pure.cl
index b822dbdad..08cfee875 100644
--- a/OpenCL/m07400-pure.cl
+++ b/OpenCL/m07400-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha256.cl"
 #endif
diff --git a/OpenCL/m07500_a0-optimized.cl b/OpenCL/m07500_a0-optimized.cl
index 028cca99b..4da89550a 100644
--- a/OpenCL/m07500_a0-optimized.cl
+++ b/OpenCL/m07500_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m07500_a0-pure.cl b/OpenCL/m07500_a0-pure.cl
index 5540ccde5..46693cc3e 100644
--- a/OpenCL/m07500_a0-pure.cl
+++ b/OpenCL/m07500_a0-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m07500_a1-optimized.cl b/OpenCL/m07500_a1-optimized.cl
index 7b0746d91..dc3ced845 100644
--- a/OpenCL/m07500_a1-optimized.cl
+++ b/OpenCL/m07500_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m07500_a1-pure.cl b/OpenCL/m07500_a1-pure.cl
index 6ec806490..e4da228cd 100644
--- a/OpenCL/m07500_a1-pure.cl
+++ b/OpenCL/m07500_a1-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md4.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m07500_a3-optimized.cl b/OpenCL/m07500_a3-optimized.cl
index 5f51f887e..404a66986 100644
--- a/OpenCL/m07500_a3-optimized.cl
+++ b/OpenCL/m07500_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m07500_a3-pure.cl b/OpenCL/m07500_a3-pure.cl
index 02749c3e5..f22d7f07f 100644
--- a/OpenCL/m07500_a3-pure.cl
+++ b/OpenCL/m07500_a3-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md4.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m07700_a0-optimized.cl b/OpenCL/m07700_a0-optimized.cl
index 5a530f8db..824307804 100644
--- a/OpenCL/m07700_a0-optimized.cl
+++ b/OpenCL/m07700_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m07700_a1-optimized.cl b/OpenCL/m07700_a1-optimized.cl
index 86dd31267..46877fd70 100644
--- a/OpenCL/m07700_a1-optimized.cl
+++ b/OpenCL/m07700_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m07700_a3-optimized.cl b/OpenCL/m07700_a3-optimized.cl
index a3f3d7a62..da3865e91 100644
--- a/OpenCL/m07700_a3-optimized.cl
+++ b/OpenCL/m07700_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m07701_a0-optimized.cl b/OpenCL/m07701_a0-optimized.cl
index 840842fb0..e20c9c505 100644
--- a/OpenCL/m07701_a0-optimized.cl
+++ b/OpenCL/m07701_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m07701_a1-optimized.cl b/OpenCL/m07701_a1-optimized.cl
index f73364987..e403114ab 100644
--- a/OpenCL/m07701_a1-optimized.cl
+++ b/OpenCL/m07701_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m07701_a3-optimized.cl b/OpenCL/m07701_a3-optimized.cl
index b32c65f24..5dad95087 100644
--- a/OpenCL/m07701_a3-optimized.cl
+++ b/OpenCL/m07701_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m07800_a0-optimized.cl b/OpenCL/m07800_a0-optimized.cl
index 7c6951d9b..720d68ad0 100644
--- a/OpenCL/m07800_a0-optimized.cl
+++ b/OpenCL/m07800_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m07800_a1-optimized.cl b/OpenCL/m07800_a1-optimized.cl
index 856e32bdd..b2c8358fb 100644
--- a/OpenCL/m07800_a1-optimized.cl
+++ b/OpenCL/m07800_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07800_a3-optimized.cl b/OpenCL/m07800_a3-optimized.cl
index 3e0363867..55065786d 100644
--- a/OpenCL/m07800_a3-optimized.cl
+++ b/OpenCL/m07800_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07801_a0-optimized.cl b/OpenCL/m07801_a0-optimized.cl
index 5a5b224a7..0ebdf2e9a 100644
--- a/OpenCL/m07801_a0-optimized.cl
+++ b/OpenCL/m07801_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m07801_a1-optimized.cl b/OpenCL/m07801_a1-optimized.cl
index 52fbf44c8..60829f3f7 100644
--- a/OpenCL/m07801_a1-optimized.cl
+++ b/OpenCL/m07801_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07801_a3-optimized.cl b/OpenCL/m07801_a3-optimized.cl
index 47765cbbb..0a346ca8d 100644
--- a/OpenCL/m07801_a3-optimized.cl
+++ b/OpenCL/m07801_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m07900-pure.cl b/OpenCL/m07900-pure.cl
index a6eed5cb5..32b381d33 100644
--- a/OpenCL/m07900-pure.cl
+++ b/OpenCL/m07900-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha512.cl"
 #endif
diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl
index 6057ef561..44452118c 100644
--- a/OpenCL/m08000_a0-optimized.cl
+++ b/OpenCL/m08000_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl
index 3f597c2f2..49c745bc6 100644
--- a/OpenCL/m08000_a1-optimized.cl
+++ b/OpenCL/m08000_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl
index 0209e9805..2542e648e 100644
--- a/OpenCL/m08000_a3-optimized.cl
+++ b/OpenCL/m08000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m08100_a0-optimized.cl b/OpenCL/m08100_a0-optimized.cl
index 5a3f1fb5b..95b650849 100644
--- a/OpenCL/m08100_a0-optimized.cl
+++ b/OpenCL/m08100_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m08100_a0-pure.cl b/OpenCL/m08100_a0-pure.cl
index 09dfa8c41..d76790540 100644
--- a/OpenCL/m08100_a0-pure.cl
+++ b/OpenCL/m08100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m08100_a1-optimized.cl b/OpenCL/m08100_a1-optimized.cl
index 228e38c11..ccd25c064 100644
--- a/OpenCL/m08100_a1-optimized.cl
+++ b/OpenCL/m08100_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08100_a1-pure.cl b/OpenCL/m08100_a1-pure.cl
index 041171d16..f67f51120 100644
--- a/OpenCL/m08100_a1-pure.cl
+++ b/OpenCL/m08100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08100_a3-optimized.cl b/OpenCL/m08100_a3-optimized.cl
index 076a10dcf..4e2f5e8ad 100644
--- a/OpenCL/m08100_a3-optimized.cl
+++ b/OpenCL/m08100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08100_a3-pure.cl b/OpenCL/m08100_a3-pure.cl
index 2582bc358..3d5208ad3 100644
--- a/OpenCL/m08100_a3-pure.cl
+++ b/OpenCL/m08100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08200-pure.cl b/OpenCL/m08200-pure.cl
index 9106b0d7d..96429c223 100644
--- a/OpenCL/m08200-pure.cl
+++ b/OpenCL/m08200-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m08300_a0-optimized.cl b/OpenCL/m08300_a0-optimized.cl
index 4591d7a24..d9461bd26 100644
--- a/OpenCL/m08300_a0-optimized.cl
+++ b/OpenCL/m08300_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m08300_a0-pure.cl b/OpenCL/m08300_a0-pure.cl
index c0dfc084b..7bf1156fd 100644
--- a/OpenCL/m08300_a0-pure.cl
+++ b/OpenCL/m08300_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m08300_a1-optimized.cl b/OpenCL/m08300_a1-optimized.cl
index 6f78de522..a669c739d 100644
--- a/OpenCL/m08300_a1-optimized.cl
+++ b/OpenCL/m08300_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08300_a1-pure.cl b/OpenCL/m08300_a1-pure.cl
index 9958fc4e8..42135dec1 100644
--- a/OpenCL/m08300_a1-pure.cl
+++ b/OpenCL/m08300_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08300_a3-optimized.cl b/OpenCL/m08300_a3-optimized.cl
index 18faae59b..0e9ad838f 100644
--- a/OpenCL/m08300_a3-optimized.cl
+++ b/OpenCL/m08300_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08300_a3-pure.cl b/OpenCL/m08300_a3-pure.cl
index cac746e6d..4215d42a0 100644
--- a/OpenCL/m08300_a3-pure.cl
+++ b/OpenCL/m08300_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08400_a0-optimized.cl b/OpenCL/m08400_a0-optimized.cl
index 6ca791374..c17b8e44b 100644
--- a/OpenCL/m08400_a0-optimized.cl
+++ b/OpenCL/m08400_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m08400_a0-pure.cl b/OpenCL/m08400_a0-pure.cl
index cd55c0d83..4c6294ef2 100644
--- a/OpenCL/m08400_a0-pure.cl
+++ b/OpenCL/m08400_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m08400_a1-optimized.cl b/OpenCL/m08400_a1-optimized.cl
index 37a2189cd..3eab32af2 100644
--- a/OpenCL/m08400_a1-optimized.cl
+++ b/OpenCL/m08400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08400_a1-pure.cl b/OpenCL/m08400_a1-pure.cl
index 5d994aab7..450dcab4a 100644
--- a/OpenCL/m08400_a1-pure.cl
+++ b/OpenCL/m08400_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08400_a3-optimized.cl b/OpenCL/m08400_a3-optimized.cl
index 23474987f..e1788a974 100644
--- a/OpenCL/m08400_a3-optimized.cl
+++ b/OpenCL/m08400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08400_a3-pure.cl b/OpenCL/m08400_a3-pure.cl
index 1d495ade1..712d789ec 100644
--- a/OpenCL/m08400_a3-pure.cl
+++ b/OpenCL/m08400_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08500_a0-pure.cl b/OpenCL/m08500_a0-pure.cl
index fcb84c9b0..69c768de3 100644
--- a/OpenCL/m08500_a0-pure.cl
+++ b/OpenCL/m08500_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m08500_a1-pure.cl b/OpenCL/m08500_a1-pure.cl
index 6eb3590b9..82df98f55 100644
--- a/OpenCL/m08500_a1-pure.cl
+++ b/OpenCL/m08500_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m08500_a3-pure.cl b/OpenCL/m08500_a3-pure.cl
index d50e4174d..277295e7d 100644
--- a/OpenCL/m08500_a3-pure.cl
+++ b/OpenCL/m08500_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m08600_a0-pure.cl b/OpenCL/m08600_a0-pure.cl
index 621f90fc3..22ac6cf46 100644
--- a/OpenCL/m08600_a0-pure.cl
+++ b/OpenCL/m08600_a0-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m08600_a1-pure.cl b/OpenCL/m08600_a1-pure.cl
index 89ef057c8..859278a54 100644
--- a/OpenCL/m08600_a1-pure.cl
+++ b/OpenCL/m08600_a1-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m08600_a3-pure.cl b/OpenCL/m08600_a3-pure.cl
index 3b579ac55..da94d4a3c 100644
--- a/OpenCL/m08600_a3-pure.cl
+++ b/OpenCL/m08600_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m08700_a0-optimized.cl b/OpenCL/m08700_a0-optimized.cl
index bb18dbed7..022281919 100644
--- a/OpenCL/m08700_a0-optimized.cl
+++ b/OpenCL/m08700_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m08700_a1-optimized.cl b/OpenCL/m08700_a1-optimized.cl
index 8f4c3e33a..94968476f 100644
--- a/OpenCL/m08700_a1-optimized.cl
+++ b/OpenCL/m08700_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m08700_a3-optimized.cl b/OpenCL/m08700_a3-optimized.cl
index bb05bf13f..694a58886 100644
--- a/OpenCL/m08700_a3-optimized.cl
+++ b/OpenCL/m08700_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m08800-pure.cl b/OpenCL/m08800-pure.cl
index b4ef1c066..59c14e6ee 100644
--- a/OpenCL/m08800-pure.cl
+++ b/OpenCL/m08800-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl
index dfec8b535..a2b476e97 100644
--- a/OpenCL/m08900-pure.cl
+++ b/OpenCL/m08900-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha256.cl"
 #endif
diff --git a/OpenCL/m09000-pure.cl b/OpenCL/m09000-pure.cl
index e9bce68b7..ec34faab2 100644
--- a/OpenCL/m09000-pure.cl
+++ b/OpenCL/m09000-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha1.cl"
 #endif
diff --git a/OpenCL/m09100-pure.cl b/OpenCL/m09100-pure.cl
index 997b611bd..51ae77536 100644
--- a/OpenCL/m09100-pure.cl
+++ b/OpenCL/m09100-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09400-pure.cl b/OpenCL/m09400-pure.cl
index a59d38007..3da6fd261 100644
--- a/OpenCL/m09400-pure.cl
+++ b/OpenCL/m09400-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09500-pure.cl b/OpenCL/m09500-pure.cl
index 72bae3d63..20ca9ea38 100644
--- a/OpenCL/m09500-pure.cl
+++ b/OpenCL/m09500-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09600-pure.cl b/OpenCL/m09600-pure.cl
index edbe62eba..94f47dc00 100644
--- a/OpenCL/m09600-pure.cl
+++ b/OpenCL/m09600-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m09700_a0-optimized.cl b/OpenCL/m09700_a0-optimized.cl
index 7a5adff36..70cdd577c 100644
--- a/OpenCL/m09700_a0-optimized.cl
+++ b/OpenCL/m09700_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m09700_a1-optimized.cl b/OpenCL/m09700_a1-optimized.cl
index cfef5f69f..088f05c28 100644
--- a/OpenCL/m09700_a1-optimized.cl
+++ b/OpenCL/m09700_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09700_a3-optimized.cl b/OpenCL/m09700_a3-optimized.cl
index 1d75f0d44..6414b53a0 100644
--- a/OpenCL/m09700_a3-optimized.cl
+++ b/OpenCL/m09700_a3-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09710_a0-optimized.cl b/OpenCL/m09710_a0-optimized.cl
index 6caaee208..33c218185 100644
--- a/OpenCL/m09710_a0-optimized.cl
+++ b/OpenCL/m09710_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m09710_a1-optimized.cl b/OpenCL/m09710_a1-optimized.cl
index 7d55a52a1..fc6ff0cd7 100644
--- a/OpenCL/m09710_a1-optimized.cl
+++ b/OpenCL/m09710_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09710_a3-optimized.cl b/OpenCL/m09710_a3-optimized.cl
index bc3b3127a..fb3df2602 100644
--- a/OpenCL/m09710_a3-optimized.cl
+++ b/OpenCL/m09710_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09720_a0-optimized.cl b/OpenCL/m09720_a0-optimized.cl
index f304a0dfb..e661620af 100644
--- a/OpenCL/m09720_a0-optimized.cl
+++ b/OpenCL/m09720_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m09720_a1-optimized.cl b/OpenCL/m09720_a1-optimized.cl
index a1a58e9a5..d1eabb20c 100644
--- a/OpenCL/m09720_a1-optimized.cl
+++ b/OpenCL/m09720_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09720_a3-optimized.cl b/OpenCL/m09720_a3-optimized.cl
index 57c39df18..1a8663d33 100644
--- a/OpenCL/m09720_a3-optimized.cl
+++ b/OpenCL/m09720_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09800_a0-optimized.cl b/OpenCL/m09800_a0-optimized.cl
index 0ab96faab..3646b66eb 100644
--- a/OpenCL/m09800_a0-optimized.cl
+++ b/OpenCL/m09800_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m09800_a1-optimized.cl b/OpenCL/m09800_a1-optimized.cl
index 278684d0a..becffa65e 100644
--- a/OpenCL/m09800_a1-optimized.cl
+++ b/OpenCL/m09800_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09800_a3-optimized.cl b/OpenCL/m09800_a3-optimized.cl
index 1ab9a6d9a..07f9c09a5 100644
--- a/OpenCL/m09800_a3-optimized.cl
+++ b/OpenCL/m09800_a3-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09810_a0-optimized.cl b/OpenCL/m09810_a0-optimized.cl
index 69e637264..e5ddf247a 100644
--- a/OpenCL/m09810_a0-optimized.cl
+++ b/OpenCL/m09810_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m09810_a1-optimized.cl b/OpenCL/m09810_a1-optimized.cl
index 685395dad..ce654900d 100644
--- a/OpenCL/m09810_a1-optimized.cl
+++ b/OpenCL/m09810_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09810_a3-optimized.cl b/OpenCL/m09810_a3-optimized.cl
index 0eff00807..360170bbe 100644
--- a/OpenCL/m09810_a3-optimized.cl
+++ b/OpenCL/m09810_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09820_a0-optimized.cl b/OpenCL/m09820_a0-optimized.cl
index cfb6bdf6c..0e9a95ca6 100644
--- a/OpenCL/m09820_a0-optimized.cl
+++ b/OpenCL/m09820_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m09820_a1-optimized.cl b/OpenCL/m09820_a1-optimized.cl
index 584491816..028b3d28f 100644
--- a/OpenCL/m09820_a1-optimized.cl
+++ b/OpenCL/m09820_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09820_a3-optimized.cl b/OpenCL/m09820_a3-optimized.cl
index c17b11e46..cde918d0e 100644
--- a/OpenCL/m09820_a3-optimized.cl
+++ b/OpenCL/m09820_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m09900_a0-optimized.cl b/OpenCL/m09900_a0-optimized.cl
index 31db1fa4b..3c4cf10ff 100644
--- a/OpenCL/m09900_a0-optimized.cl
+++ b/OpenCL/m09900_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m09900_a0-pure.cl b/OpenCL/m09900_a0-pure.cl
index 64976e835..04347dfc9 100644
--- a/OpenCL/m09900_a0-pure.cl
+++ b/OpenCL/m09900_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m09900_a1-optimized.cl b/OpenCL/m09900_a1-optimized.cl
index bdac4c177..07f62d547 100644
--- a/OpenCL/m09900_a1-optimized.cl
+++ b/OpenCL/m09900_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09900_a1-pure.cl b/OpenCL/m09900_a1-pure.cl
index 2313b9bf9..367aa5755 100644
--- a/OpenCL/m09900_a1-pure.cl
+++ b/OpenCL/m09900_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09900_a3-optimized.cl b/OpenCL/m09900_a3-optimized.cl
index 6f56f1703..e0145b106 100644
--- a/OpenCL/m09900_a3-optimized.cl
+++ b/OpenCL/m09900_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m09900_a3-pure.cl b/OpenCL/m09900_a3-pure.cl
index 4cad825f4..df6e1f55f 100644
--- a/OpenCL/m09900_a3-pure.cl
+++ b/OpenCL/m09900_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m10100_a0-optimized.cl b/OpenCL/m10100_a0-optimized.cl
index 8ed585765..593fb98fe 100644
--- a/OpenCL/m10100_a0-optimized.cl
+++ b/OpenCL/m10100_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m10100_a1-optimized.cl b/OpenCL/m10100_a1-optimized.cl
index 61d40e2b3..b18e584ca 100644
--- a/OpenCL/m10100_a1-optimized.cl
+++ b/OpenCL/m10100_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m10100_a3-optimized.cl b/OpenCL/m10100_a3-optimized.cl
index e45ac8f51..c3269014b 100644
--- a/OpenCL/m10100_a3-optimized.cl
+++ b/OpenCL/m10100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m10300-pure.cl b/OpenCL/m10300-pure.cl
index 6c3b254ee..dc9b31e26 100644
--- a/OpenCL/m10300-pure.cl
+++ b/OpenCL/m10300-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha1.cl"
 #endif
diff --git a/OpenCL/m10400_a0-optimized.cl b/OpenCL/m10400_a0-optimized.cl
index b91746073..4da62a07d 100644
--- a/OpenCL/m10400_a0-optimized.cl
+++ b/OpenCL/m10400_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m10400_a1-optimized.cl b/OpenCL/m10400_a1-optimized.cl
index 22e1a116f..f1662f380 100644
--- a/OpenCL/m10400_a1-optimized.cl
+++ b/OpenCL/m10400_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m10400_a3-optimized.cl b/OpenCL/m10400_a3-optimized.cl
index eb6020fc4..1182a6067 100644
--- a/OpenCL/m10400_a3-optimized.cl
+++ b/OpenCL/m10400_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m10410_a0-optimized.cl b/OpenCL/m10410_a0-optimized.cl
index 5f3212a29..5bbc1c9d7 100644
--- a/OpenCL/m10410_a0-optimized.cl
+++ b/OpenCL/m10410_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m10410_a1-optimized.cl b/OpenCL/m10410_a1-optimized.cl
index b0229a349..b480d6321 100644
--- a/OpenCL/m10410_a1-optimized.cl
+++ b/OpenCL/m10410_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m10410_a3-optimized.cl b/OpenCL/m10410_a3-optimized.cl
index 399672cb3..6bd44c809 100644
--- a/OpenCL/m10410_a3-optimized.cl
+++ b/OpenCL/m10410_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m10420_a0-optimized.cl b/OpenCL/m10420_a0-optimized.cl
index d02749446..2766e028a 100644
--- a/OpenCL/m10420_a0-optimized.cl
+++ b/OpenCL/m10420_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m10420_a1-optimized.cl b/OpenCL/m10420_a1-optimized.cl
index cbde1f143..3fe2fba4e 100644
--- a/OpenCL/m10420_a1-optimized.cl
+++ b/OpenCL/m10420_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m10420_a3-optimized.cl b/OpenCL/m10420_a3-optimized.cl
index 815e59a15..47d0f1252 100644
--- a/OpenCL/m10420_a3-optimized.cl
+++ b/OpenCL/m10420_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m10500-pure.cl b/OpenCL/m10500-pure.cl
index 3ba4e85a4..ef270689a 100644
--- a/OpenCL/m10500-pure.cl
+++ b/OpenCL/m10500-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl
index 8093d2f7a..5925656f1 100644
--- a/OpenCL/m10700-optimized.cl
+++ b/OpenCL/m10700-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha256.cl"
 #include "inc_hash_sha384.cl"
diff --git a/OpenCL/m10700-pure.cl b/OpenCL/m10700-pure.cl
index 284a8ca49..c302be978 100644
--- a/OpenCL/m10700-pure.cl
+++ b/OpenCL/m10700-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha256.cl"
 #include "inc_hash_sha384.cl"
diff --git a/OpenCL/m10800_a0-optimized.cl b/OpenCL/m10800_a0-optimized.cl
index aaf1a8bd1..e9cfd2167 100644
--- a/OpenCL/m10800_a0-optimized.cl
+++ b/OpenCL/m10800_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m10800_a0-pure.cl b/OpenCL/m10800_a0-pure.cl
index 023bb9b50..ec46261db 100644
--- a/OpenCL/m10800_a0-pure.cl
+++ b/OpenCL/m10800_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m10800_a1-optimized.cl b/OpenCL/m10800_a1-optimized.cl
index 9bc1b42bd..f7828aa9f 100644
--- a/OpenCL/m10800_a1-optimized.cl
+++ b/OpenCL/m10800_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha384.cl"
diff --git a/OpenCL/m10800_a1-pure.cl b/OpenCL/m10800_a1-pure.cl
index 5a8a1c2a2..0f5b7abeb 100644
--- a/OpenCL/m10800_a1-pure.cl
+++ b/OpenCL/m10800_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha384.cl"
diff --git a/OpenCL/m10800_a3-optimized.cl b/OpenCL/m10800_a3-optimized.cl
index e2291000c..f19e1b224 100644
--- a/OpenCL/m10800_a3-optimized.cl
+++ b/OpenCL/m10800_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha384.cl"
diff --git a/OpenCL/m10800_a3-pure.cl b/OpenCL/m10800_a3-pure.cl
index 940e5a61c..8c2fe7138 100644
--- a/OpenCL/m10800_a3-pure.cl
+++ b/OpenCL/m10800_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha384.cl"
diff --git a/OpenCL/m10900-pure.cl b/OpenCL/m10900-pure.cl
index 19613c2ca..b2fb8bacc 100644
--- a/OpenCL/m10900-pure.cl
+++ b/OpenCL/m10900-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m11000_a0-optimized.cl b/OpenCL/m11000_a0-optimized.cl
index ba14d4fd0..b569c506b 100644
--- a/OpenCL/m11000_a0-optimized.cl
+++ b/OpenCL/m11000_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m11000_a0-pure.cl b/OpenCL/m11000_a0-pure.cl
index 89af491eb..9e44e7173 100644
--- a/OpenCL/m11000_a0-pure.cl
+++ b/OpenCL/m11000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11000_a1-optimized.cl b/OpenCL/m11000_a1-optimized.cl
index e0672c0be..d41899d96 100644
--- a/OpenCL/m11000_a1-optimized.cl
+++ b/OpenCL/m11000_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11000_a1-pure.cl b/OpenCL/m11000_a1-pure.cl
index e5c5edb2e..c51d23374 100644
--- a/OpenCL/m11000_a1-pure.cl
+++ b/OpenCL/m11000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11000_a3-optimized.cl b/OpenCL/m11000_a3-optimized.cl
index 9ac1f844d..357af7a59 100644
--- a/OpenCL/m11000_a3-optimized.cl
+++ b/OpenCL/m11000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11000_a3-pure.cl b/OpenCL/m11000_a3-pure.cl
index 63c36625c..00f90f8b7 100644
--- a/OpenCL/m11000_a3-pure.cl
+++ b/OpenCL/m11000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11100_a0-optimized.cl b/OpenCL/m11100_a0-optimized.cl
index 7fe39fc4f..1070b79f4 100644
--- a/OpenCL/m11100_a0-optimized.cl
+++ b/OpenCL/m11100_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m11100_a0-pure.cl b/OpenCL/m11100_a0-pure.cl
index 8e3ca5378..70d5554e3 100644
--- a/OpenCL/m11100_a0-pure.cl
+++ b/OpenCL/m11100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11100_a1-optimized.cl b/OpenCL/m11100_a1-optimized.cl
index bd89d18d9..87eff0525 100644
--- a/OpenCL/m11100_a1-optimized.cl
+++ b/OpenCL/m11100_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11100_a1-pure.cl b/OpenCL/m11100_a1-pure.cl
index a870a76f8..6a41a9337 100644
--- a/OpenCL/m11100_a1-pure.cl
+++ b/OpenCL/m11100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11100_a3-optimized.cl b/OpenCL/m11100_a3-optimized.cl
index 190917c48..020f60870 100644
--- a/OpenCL/m11100_a3-optimized.cl
+++ b/OpenCL/m11100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11100_a3-pure.cl b/OpenCL/m11100_a3-pure.cl
index 9b1ef9e5c..e45643862 100644
--- a/OpenCL/m11100_a3-pure.cl
+++ b/OpenCL/m11100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11200_a0-optimized.cl b/OpenCL/m11200_a0-optimized.cl
index 2081da572..f3f09585a 100644
--- a/OpenCL/m11200_a0-optimized.cl
+++ b/OpenCL/m11200_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m11200_a0-pure.cl b/OpenCL/m11200_a0-pure.cl
index 71e247607..c89b16362 100644
--- a/OpenCL/m11200_a0-pure.cl
+++ b/OpenCL/m11200_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11200_a1-optimized.cl b/OpenCL/m11200_a1-optimized.cl
index 0e6e3e2cd..62dda9028 100644
--- a/OpenCL/m11200_a1-optimized.cl
+++ b/OpenCL/m11200_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m11200_a1-pure.cl b/OpenCL/m11200_a1-pure.cl
index 12ac6b1ff..173c3a127 100644
--- a/OpenCL/m11200_a1-pure.cl
+++ b/OpenCL/m11200_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m11200_a3-optimized.cl b/OpenCL/m11200_a3-optimized.cl
index 34d62f10c..7f012eaa7 100644
--- a/OpenCL/m11200_a3-optimized.cl
+++ b/OpenCL/m11200_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m11200_a3-pure.cl b/OpenCL/m11200_a3-pure.cl
index 7d9a898b8..4f63a5f52 100644
--- a/OpenCL/m11200_a3-pure.cl
+++ b/OpenCL/m11200_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m11300-pure.cl b/OpenCL/m11300-pure.cl
index 8cf78d701..0bd10ab73 100644
--- a/OpenCL/m11300-pure.cl
+++ b/OpenCL/m11300-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m11400_a0-pure.cl b/OpenCL/m11400_a0-pure.cl
index b57c7c20b..76bcdf904 100644
--- a/OpenCL/m11400_a0-pure.cl
+++ b/OpenCL/m11400_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11400_a1-pure.cl b/OpenCL/m11400_a1-pure.cl
index b77777ca5..7059d4686 100644
--- a/OpenCL/m11400_a1-pure.cl
+++ b/OpenCL/m11400_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11400_a3-pure.cl b/OpenCL/m11400_a3-pure.cl
index 6b814d216..aecf5fd78 100644
--- a/OpenCL/m11400_a3-pure.cl
+++ b/OpenCL/m11400_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m11500_a0-optimized.cl b/OpenCL/m11500_a0-optimized.cl
index bcea197f0..903190d74 100644
--- a/OpenCL/m11500_a0-optimized.cl
+++ b/OpenCL/m11500_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m11500_a1-optimized.cl b/OpenCL/m11500_a1-optimized.cl
index bede0d4f1..6ee1b5b6c 100644
--- a/OpenCL/m11500_a1-optimized.cl
+++ b/OpenCL/m11500_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m11500_a3-optimized.cl b/OpenCL/m11500_a3-optimized.cl
index 70af7777a..2fd20e7bf 100644
--- a/OpenCL/m11500_a3-optimized.cl
+++ b/OpenCL/m11500_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl
index ea19df0cb..545a0545c 100644
--- a/OpenCL/m11600-pure.cl
+++ b/OpenCL/m11600-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha256.cl"
 #endif
diff --git a/OpenCL/m11700_a0-optimized.cl b/OpenCL/m11700_a0-optimized.cl
index 3dc636ebe..736355ac8 100644
--- a/OpenCL/m11700_a0-optimized.cl
+++ b/OpenCL/m11700_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m11700_a0-pure.cl b/OpenCL/m11700_a0-pure.cl
index 20461ae49..51daa55e4 100644
--- a/OpenCL/m11700_a0-pure.cl
+++ b/OpenCL/m11700_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11700_a1-optimized.cl b/OpenCL/m11700_a1-optimized.cl
index 846c25d36..cf0b15b75 100644
--- a/OpenCL/m11700_a1-optimized.cl
+++ b/OpenCL/m11700_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog256.cl"
diff --git a/OpenCL/m11700_a1-pure.cl b/OpenCL/m11700_a1-pure.cl
index 8d37a83ed..961c6f91a 100644
--- a/OpenCL/m11700_a1-pure.cl
+++ b/OpenCL/m11700_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_streebog256.cl"
diff --git a/OpenCL/m11700_a3-optimized.cl b/OpenCL/m11700_a3-optimized.cl
index 45baeb97a..a63537bad 100644
--- a/OpenCL/m11700_a3-optimized.cl
+++ b/OpenCL/m11700_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog256.cl"
diff --git a/OpenCL/m11700_a3-pure.cl b/OpenCL/m11700_a3-pure.cl
index 6261703a8..468d149ce 100644
--- a/OpenCL/m11700_a3-pure.cl
+++ b/OpenCL/m11700_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog256.cl"
diff --git a/OpenCL/m11750_a0-pure.cl b/OpenCL/m11750_a0-pure.cl
index 6e4f071db..398cd4271 100644
--- a/OpenCL/m11750_a0-pure.cl
+++ b/OpenCL/m11750_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11750_a1-pure.cl b/OpenCL/m11750_a1-pure.cl
index 98f023f6b..010ba1439 100644
--- a/OpenCL/m11750_a1-pure.cl
+++ b/OpenCL/m11750_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_streebog256.cl"
diff --git a/OpenCL/m11750_a3-pure.cl b/OpenCL/m11750_a3-pure.cl
index d1e989c82..f534c1e7c 100644
--- a/OpenCL/m11750_a3-pure.cl
+++ b/OpenCL/m11750_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog256.cl"
diff --git a/OpenCL/m11760_a0-pure.cl b/OpenCL/m11760_a0-pure.cl
index 787c21ac7..e0a9c80b5 100644
--- a/OpenCL/m11760_a0-pure.cl
+++ b/OpenCL/m11760_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11760_a1-pure.cl b/OpenCL/m11760_a1-pure.cl
index 62bae3ee6..e29eab43b 100644
--- a/OpenCL/m11760_a1-pure.cl
+++ b/OpenCL/m11760_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_streebog256.cl"
diff --git a/OpenCL/m11760_a3-pure.cl b/OpenCL/m11760_a3-pure.cl
index 0ec476ac7..ad3927051 100644
--- a/OpenCL/m11760_a3-pure.cl
+++ b/OpenCL/m11760_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog256.cl"
diff --git a/OpenCL/m11800_a0-optimized.cl b/OpenCL/m11800_a0-optimized.cl
index 2c4518667..969da8abb 100644
--- a/OpenCL/m11800_a0-optimized.cl
+++ b/OpenCL/m11800_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m11800_a0-pure.cl b/OpenCL/m11800_a0-pure.cl
index 41315fc64..f688af4a3 100644
--- a/OpenCL/m11800_a0-pure.cl
+++ b/OpenCL/m11800_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11800_a1-optimized.cl b/OpenCL/m11800_a1-optimized.cl
index ba0843fa2..9b469e958 100644
--- a/OpenCL/m11800_a1-optimized.cl
+++ b/OpenCL/m11800_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m11800_a1-pure.cl b/OpenCL/m11800_a1-pure.cl
index 8f05197ef..d65ed1932 100644
--- a/OpenCL/m11800_a1-pure.cl
+++ b/OpenCL/m11800_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m11800_a3-optimized.cl b/OpenCL/m11800_a3-optimized.cl
index f9538252f..8ccf6bac4 100644
--- a/OpenCL/m11800_a3-optimized.cl
+++ b/OpenCL/m11800_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m11800_a3-pure.cl b/OpenCL/m11800_a3-pure.cl
index 963d004db..e687e5ac8 100644
--- a/OpenCL/m11800_a3-pure.cl
+++ b/OpenCL/m11800_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m11850_a0-pure.cl b/OpenCL/m11850_a0-pure.cl
index db98c9529..ec7347b97 100644
--- a/OpenCL/m11850_a0-pure.cl
+++ b/OpenCL/m11850_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11850_a1-pure.cl b/OpenCL/m11850_a1-pure.cl
index e7eac3084..23cb79a7c 100644
--- a/OpenCL/m11850_a1-pure.cl
+++ b/OpenCL/m11850_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m11850_a3-pure.cl b/OpenCL/m11850_a3-pure.cl
index 91e2da87f..bb9295311 100644
--- a/OpenCL/m11850_a3-pure.cl
+++ b/OpenCL/m11850_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m11860_a0-pure.cl b/OpenCL/m11860_a0-pure.cl
index 318c87fdf..38b7e80fe 100644
--- a/OpenCL/m11860_a0-pure.cl
+++ b/OpenCL/m11860_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m11860_a1-pure.cl b/OpenCL/m11860_a1-pure.cl
index e7880e570..698365296 100644
--- a/OpenCL/m11860_a1-pure.cl
+++ b/OpenCL/m11860_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m11860_a3-pure.cl b/OpenCL/m11860_a3-pure.cl
index 1d920283b..e1b70ecf2 100644
--- a/OpenCL/m11860_a3-pure.cl
+++ b/OpenCL/m11860_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m11900-pure.cl b/OpenCL/m11900-pure.cl
index b46cca1cb..4fdac642d 100644
--- a/OpenCL/m11900-pure.cl
+++ b/OpenCL/m11900-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m12000-pure.cl b/OpenCL/m12000-pure.cl
index 43fc2ac54..4a2d4f176 100644
--- a/OpenCL/m12000-pure.cl
+++ b/OpenCL/m12000-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m12200-pure.cl b/OpenCL/m12200-pure.cl
index 3aee89aa0..2e73aa0db 100644
--- a/OpenCL/m12200-pure.cl
+++ b/OpenCL/m12200-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m12300-pure.cl b/OpenCL/m12300-pure.cl
index 4b84dcab0..ae92645e7 100644
--- a/OpenCL/m12300-pure.cl
+++ b/OpenCL/m12300-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m12400-pure.cl b/OpenCL/m12400-pure.cl
index e7bc2e27e..4c3cf8fe7 100644
--- a/OpenCL/m12400-pure.cl
+++ b/OpenCL/m12400-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl
index 99920ff71..d07c43cb1 100644
--- a/OpenCL/m12500-pure.cl
+++ b/OpenCL/m12500-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha1.cl"
 #include "inc_cipher_aes.cl"
diff --git a/OpenCL/m12600_a0-optimized.cl b/OpenCL/m12600_a0-optimized.cl
index 60a9461cd..75a91fbb7 100644
--- a/OpenCL/m12600_a0-optimized.cl
+++ b/OpenCL/m12600_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m12600_a0-pure.cl b/OpenCL/m12600_a0-pure.cl
index 66ca371dc..8f7dcc23c 100644
--- a/OpenCL/m12600_a0-pure.cl
+++ b/OpenCL/m12600_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m12600_a1-optimized.cl b/OpenCL/m12600_a1-optimized.cl
index ea5805154..c2b5796bf 100644
--- a/OpenCL/m12600_a1-optimized.cl
+++ b/OpenCL/m12600_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m12600_a1-pure.cl b/OpenCL/m12600_a1-pure.cl
index 3b2730f66..6191cbe79 100644
--- a/OpenCL/m12600_a1-pure.cl
+++ b/OpenCL/m12600_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m12600_a3-optimized.cl b/OpenCL/m12600_a3-optimized.cl
index 15ec957ac..267de8f0d 100644
--- a/OpenCL/m12600_a3-optimized.cl
+++ b/OpenCL/m12600_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m12600_a3-pure.cl b/OpenCL/m12600_a3-pure.cl
index bf9cfb7f0..a335c7330 100644
--- a/OpenCL/m12600_a3-pure.cl
+++ b/OpenCL/m12600_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m12700-pure.cl b/OpenCL/m12700-pure.cl
index 4a0c72d11..a8b5710ce 100644
--- a/OpenCL/m12700-pure.cl
+++ b/OpenCL/m12700-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m12800-pure.cl b/OpenCL/m12800-pure.cl
index 52318e9ce..28887af9d 100644
--- a/OpenCL/m12800-pure.cl
+++ b/OpenCL/m12800-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m12900-pure.cl b/OpenCL/m12900-pure.cl
index 095eaae52..b575ad561 100644
--- a/OpenCL/m12900-pure.cl
+++ b/OpenCL/m12900-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13000-pure.cl b/OpenCL/m13000-pure.cl
index a9b56c1c4..993fdba1d 100644
--- a/OpenCL/m13000-pure.cl
+++ b/OpenCL/m13000-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl
index a42ff481e..c3c972c4f 100644
--- a/OpenCL/m13100_a0-optimized.cl
+++ b/OpenCL/m13100_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl
index 77e57e970..ef875c481 100644
--- a/OpenCL/m13100_a0-pure.cl
+++ b/OpenCL/m13100_a0-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl
index c153b1033..48bb36712 100644
--- a/OpenCL/m13100_a1-optimized.cl
+++ b/OpenCL/m13100_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl
index c84fab54e..0c770b0fe 100644
--- a/OpenCL/m13100_a1-pure.cl
+++ b/OpenCL/m13100_a1-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md4.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl
index 81b94bf9b..89d850968 100644
--- a/OpenCL/m13100_a3-optimized.cl
+++ b/OpenCL/m13100_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl
index a6be7319a..9ce8b4617 100644
--- a/OpenCL/m13100_a3-pure.cl
+++ b/OpenCL/m13100_a3-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md4.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m13200-pure.cl b/OpenCL/m13200-pure.cl
index 0ade009bd..1163d0d67 100644
--- a/OpenCL/m13200-pure.cl
+++ b/OpenCL/m13200-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha1.cl"
 #include "inc_cipher_aes.cl"
diff --git a/OpenCL/m13300_a0-optimized.cl b/OpenCL/m13300_a0-optimized.cl
index 42cca5d27..8fb4398ad 100644
--- a/OpenCL/m13300_a0-optimized.cl
+++ b/OpenCL/m13300_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m13300_a0-pure.cl b/OpenCL/m13300_a0-pure.cl
index 9a35265d6..7e91986fa 100644
--- a/OpenCL/m13300_a0-pure.cl
+++ b/OpenCL/m13300_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m13300_a1-optimized.cl b/OpenCL/m13300_a1-optimized.cl
index 547119476..450a66cf2 100644
--- a/OpenCL/m13300_a1-optimized.cl
+++ b/OpenCL/m13300_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13300_a1-pure.cl b/OpenCL/m13300_a1-pure.cl
index a8e24f994..106027d74 100644
--- a/OpenCL/m13300_a1-pure.cl
+++ b/OpenCL/m13300_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13300_a3-optimized.cl b/OpenCL/m13300_a3-optimized.cl
index 4e748dbcd..2b3f91812 100644
--- a/OpenCL/m13300_a3-optimized.cl
+++ b/OpenCL/m13300_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13300_a3-pure.cl b/OpenCL/m13300_a3-pure.cl
index 4657b5eb0..924c4165a 100644
--- a/OpenCL/m13300_a3-pure.cl
+++ b/OpenCL/m13300_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13400-pure.cl b/OpenCL/m13400-pure.cl
index 5504ac07c..420d303cf 100644
--- a/OpenCL/m13400-pure.cl
+++ b/OpenCL/m13400-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha256.cl"
 #include "inc_cipher_aes.cl"
diff --git a/OpenCL/m13500_a0-optimized.cl b/OpenCL/m13500_a0-optimized.cl
index bd3268abf..11f1c1805 100644
--- a/OpenCL/m13500_a0-optimized.cl
+++ b/OpenCL/m13500_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m13500_a0-pure.cl b/OpenCL/m13500_a0-pure.cl
index 4c2d6e80c..b8102e111 100644
--- a/OpenCL/m13500_a0-pure.cl
+++ b/OpenCL/m13500_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m13500_a1-optimized.cl b/OpenCL/m13500_a1-optimized.cl
index ddaa67441..1df3d887a 100644
--- a/OpenCL/m13500_a1-optimized.cl
+++ b/OpenCL/m13500_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13500_a1-pure.cl b/OpenCL/m13500_a1-pure.cl
index 0e9bc3475..8443f38ca 100644
--- a/OpenCL/m13500_a1-pure.cl
+++ b/OpenCL/m13500_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13500_a3-optimized.cl b/OpenCL/m13500_a3-optimized.cl
index 7ab0f7f89..ce33398f5 100644
--- a/OpenCL/m13500_a3-optimized.cl
+++ b/OpenCL/m13500_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13500_a3-pure.cl b/OpenCL/m13500_a3-pure.cl
index 8c0cf3d5d..19865585b 100644
--- a/OpenCL/m13500_a3-pure.cl
+++ b/OpenCL/m13500_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13600-pure.cl b/OpenCL/m13600-pure.cl
index 4846a3c27..944215e82 100644
--- a/OpenCL/m13600-pure.cl
+++ b/OpenCL/m13600-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13711-pure.cl b/OpenCL/m13711-pure.cl
index e46f64018..9fea1d790 100644
--- a/OpenCL/m13711-pure.cl
+++ b/OpenCL/m13711-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m13712-pure.cl b/OpenCL/m13712-pure.cl
index 09214eb6c..ee75b7170 100644
--- a/OpenCL/m13712-pure.cl
+++ b/OpenCL/m13712-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m13713-pure.cl b/OpenCL/m13713-pure.cl
index 18ec22e13..c9af26c73 100644
--- a/OpenCL/m13713-pure.cl
+++ b/OpenCL/m13713-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_ripemd160.cl"
diff --git a/OpenCL/m13721-pure.cl b/OpenCL/m13721-pure.cl
index 1a58a3feb..2dbcab437 100644
--- a/OpenCL/m13721-pure.cl
+++ b/OpenCL/m13721-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m13722-pure.cl b/OpenCL/m13722-pure.cl
index ebb5d377a..d5dff2be6 100644
--- a/OpenCL/m13722-pure.cl
+++ b/OpenCL/m13722-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m13723-pure.cl b/OpenCL/m13723-pure.cl
index ce6924637..6d895fe4f 100644
--- a/OpenCL/m13723-pure.cl
+++ b/OpenCL/m13723-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m13731-pure.cl b/OpenCL/m13731-pure.cl
index 33fcd4ad8..98a395b95 100644
--- a/OpenCL/m13731-pure.cl
+++ b/OpenCL/m13731-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m13732-pure.cl b/OpenCL/m13732-pure.cl
index ac382dcb8..25b544d59 100644
--- a/OpenCL/m13732-pure.cl
+++ b/OpenCL/m13732-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m13733-pure.cl b/OpenCL/m13733-pure.cl
index 122fd306a..083b29cdb 100644
--- a/OpenCL/m13733-pure.cl
+++ b/OpenCL/m13733-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_whirlpool.cl"
diff --git a/OpenCL/m13751-pure.cl b/OpenCL/m13751-pure.cl
index 528b608b9..33132d538 100644
--- a/OpenCL/m13751-pure.cl
+++ b/OpenCL/m13751-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13752-pure.cl b/OpenCL/m13752-pure.cl
index 421eca8bb..022a35d4e 100644
--- a/OpenCL/m13752-pure.cl
+++ b/OpenCL/m13752-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13753-pure.cl b/OpenCL/m13753-pure.cl
index ce0021da5..2f704e2ab 100644
--- a/OpenCL/m13753-pure.cl
+++ b/OpenCL/m13753-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13771-pure.cl b/OpenCL/m13771-pure.cl
index eeb767e63..bb03c67f6 100644
--- a/OpenCL/m13771-pure.cl
+++ b/OpenCL/m13771-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m13772-pure.cl b/OpenCL/m13772-pure.cl
index 80882acd3..98598e694 100644
--- a/OpenCL/m13772-pure.cl
+++ b/OpenCL/m13772-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m13773-pure.cl b/OpenCL/m13773-pure.cl
index beb700902..5bfae6e2d 100644
--- a/OpenCL/m13773-pure.cl
+++ b/OpenCL/m13773-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_streebog512.cl"
diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl
index f2be74613..7df76fae4 100644
--- a/OpenCL/m13800_a0-optimized.cl
+++ b/OpenCL/m13800_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m13800_a0-pure.cl b/OpenCL/m13800_a0-pure.cl
index 3546c04ef..1a4061dd3 100644
--- a/OpenCL/m13800_a0-pure.cl
+++ b/OpenCL/m13800_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl
index 75777a4c2..eaf6d0f58 100644
--- a/OpenCL/m13800_a1-optimized.cl
+++ b/OpenCL/m13800_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13800_a1-pure.cl b/OpenCL/m13800_a1-pure.cl
index 7e042d817..4bcc63dcc 100644
--- a/OpenCL/m13800_a1-pure.cl
+++ b/OpenCL/m13800_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl
index 4a30aec4a..8848869a0 100644
--- a/OpenCL/m13800_a3-optimized.cl
+++ b/OpenCL/m13800_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13800_a3-pure.cl b/OpenCL/m13800_a3-pure.cl
index cd2877406..87ac2a02a 100644
--- a/OpenCL/m13800_a3-pure.cl
+++ b/OpenCL/m13800_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m13900_a0-optimized.cl b/OpenCL/m13900_a0-optimized.cl
index 72ce8024b..5477b7b35 100644
--- a/OpenCL/m13900_a0-optimized.cl
+++ b/OpenCL/m13900_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m13900_a0-pure.cl b/OpenCL/m13900_a0-pure.cl
index dae69122d..050384494 100644
--- a/OpenCL/m13900_a0-pure.cl
+++ b/OpenCL/m13900_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m13900_a1-optimized.cl b/OpenCL/m13900_a1-optimized.cl
index 7a5214719..d9a53dd41 100644
--- a/OpenCL/m13900_a1-optimized.cl
+++ b/OpenCL/m13900_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13900_a1-pure.cl b/OpenCL/m13900_a1-pure.cl
index 1b4c8c6ce..3435b5dbf 100644
--- a/OpenCL/m13900_a1-pure.cl
+++ b/OpenCL/m13900_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13900_a3-optimized.cl b/OpenCL/m13900_a3-optimized.cl
index e446de3db..b02da1e95 100644
--- a/OpenCL/m13900_a3-optimized.cl
+++ b/OpenCL/m13900_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m13900_a3-pure.cl b/OpenCL/m13900_a3-pure.cl
index 7f8b5246d..73eadd5ca 100644
--- a/OpenCL/m13900_a3-pure.cl
+++ b/OpenCL/m13900_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14000_a0-pure.cl b/OpenCL/m14000_a0-pure.cl
index d99c0eda2..503e26d28 100644
--- a/OpenCL/m14000_a0-pure.cl
+++ b/OpenCL/m14000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m14000_a1-pure.cl b/OpenCL/m14000_a1-pure.cl
index 4b85567a8..90954adae 100644
--- a/OpenCL/m14000_a1-pure.cl
+++ b/OpenCL/m14000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl
index 1e9b51d26..6cfaee1db 100644
--- a/OpenCL/m14000_a3-pure.cl
+++ b/OpenCL/m14000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #endif
 
diff --git a/OpenCL/m14100_a0-pure.cl b/OpenCL/m14100_a0-pure.cl
index 1efcaee4a..17d5bed9d 100644
--- a/OpenCL/m14100_a0-pure.cl
+++ b/OpenCL/m14100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m14100_a1-pure.cl b/OpenCL/m14100_a1-pure.cl
index d1a9465c1..3eba5678c 100644
--- a/OpenCL/m14100_a1-pure.cl
+++ b/OpenCL/m14100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m14100_a3-pure.cl b/OpenCL/m14100_a3-pure.cl
index 4414be3e0..616d944d3 100644
--- a/OpenCL/m14100_a3-pure.cl
+++ b/OpenCL/m14100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m14400_a0-optimized.cl b/OpenCL/m14400_a0-optimized.cl
index 1cfb306b2..9e24a41e0 100644
--- a/OpenCL/m14400_a0-optimized.cl
+++ b/OpenCL/m14400_a0-optimized.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m14400_a0-pure.cl b/OpenCL/m14400_a0-pure.cl
index fc7a9350a..51df83735 100644
--- a/OpenCL/m14400_a0-pure.cl
+++ b/OpenCL/m14400_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m14400_a1-optimized.cl b/OpenCL/m14400_a1-optimized.cl
index d35895338..0d0376a4a 100644
--- a/OpenCL/m14400_a1-optimized.cl
+++ b/OpenCL/m14400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14400_a1-pure.cl b/OpenCL/m14400_a1-pure.cl
index c90002326..822fa5248 100644
--- a/OpenCL/m14400_a1-pure.cl
+++ b/OpenCL/m14400_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14400_a3-optimized.cl b/OpenCL/m14400_a3-optimized.cl
index 6222f1214..47d23112a 100644
--- a/OpenCL/m14400_a3-optimized.cl
+++ b/OpenCL/m14400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14400_a3-pure.cl b/OpenCL/m14400_a3-pure.cl
index 4dd93ffaf..97cbb31ca 100644
--- a/OpenCL/m14400_a3-pure.cl
+++ b/OpenCL/m14400_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14611-pure.cl b/OpenCL/m14611-pure.cl
index 1785ceb3a..02f12ab4f 100644
--- a/OpenCL/m14611-pure.cl
+++ b/OpenCL/m14611-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14612-pure.cl b/OpenCL/m14612-pure.cl
index b131dfa01..1ba6880c9 100644
--- a/OpenCL/m14612-pure.cl
+++ b/OpenCL/m14612-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14613-pure.cl b/OpenCL/m14613-pure.cl
index f3e07ee90..de1ee77b3 100644
--- a/OpenCL/m14613-pure.cl
+++ b/OpenCL/m14613-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14621-pure.cl b/OpenCL/m14621-pure.cl
index 11ea28861..deec44318 100644
--- a/OpenCL/m14621-pure.cl
+++ b/OpenCL/m14621-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14622-pure.cl b/OpenCL/m14622-pure.cl
index 0d136b0c8..565060adc 100644
--- a/OpenCL/m14622-pure.cl
+++ b/OpenCL/m14622-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14623-pure.cl b/OpenCL/m14623-pure.cl
index da5a03de2..81066c19b 100644
--- a/OpenCL/m14623-pure.cl
+++ b/OpenCL/m14623-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14631-pure.cl b/OpenCL/m14631-pure.cl
index 2570b5a44..8b65f8975 100644
--- a/OpenCL/m14631-pure.cl
+++ b/OpenCL/m14631-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14632-pure.cl b/OpenCL/m14632-pure.cl
index 7e036f33c..0e097ba13 100644
--- a/OpenCL/m14632-pure.cl
+++ b/OpenCL/m14632-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14633-pure.cl b/OpenCL/m14633-pure.cl
index 976be1605..c019f23f0 100644
--- a/OpenCL/m14633-pure.cl
+++ b/OpenCL/m14633-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14641-pure.cl b/OpenCL/m14641-pure.cl
index 3deef6114..26ba30663 100644
--- a/OpenCL/m14641-pure.cl
+++ b/OpenCL/m14641-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14642-pure.cl b/OpenCL/m14642-pure.cl
index ca8459c58..b5fd5441a 100644
--- a/OpenCL/m14642-pure.cl
+++ b/OpenCL/m14642-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14643-pure.cl b/OpenCL/m14643-pure.cl
index 4fbb4df32..34ab7d4d1 100644
--- a/OpenCL/m14643-pure.cl
+++ b/OpenCL/m14643-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14700-pure.cl b/OpenCL/m14700-pure.cl
index df90ba694..bf594f354 100644
--- a/OpenCL/m14700-pure.cl
+++ b/OpenCL/m14700-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14800-pure.cl b/OpenCL/m14800-pure.cl
index bb5fb88fc..106c0ee48 100644
--- a/OpenCL/m14800-pure.cl
+++ b/OpenCL/m14800-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m14900_a0-optimized.cl b/OpenCL/m14900_a0-optimized.cl
index 5f015d28f..f8946baa1 100644
--- a/OpenCL/m14900_a0-optimized.cl
+++ b/OpenCL/m14900_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m14900_a1-optimized.cl b/OpenCL/m14900_a1-optimized.cl
index c6b740235..f672860fe 100644
--- a/OpenCL/m14900_a1-optimized.cl
+++ b/OpenCL/m14900_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m14900_a3-optimized.cl b/OpenCL/m14900_a3-optimized.cl
index cefd7cb98..7255cfcec 100644
--- a/OpenCL/m14900_a3-optimized.cl
+++ b/OpenCL/m14900_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m15000_a0-optimized.cl b/OpenCL/m15000_a0-optimized.cl
index 428997e98..a4fe67e48 100644
--- a/OpenCL/m15000_a0-optimized.cl
+++ b/OpenCL/m15000_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m15000_a0-pure.cl b/OpenCL/m15000_a0-pure.cl
index eacbad4d3..9591d5555 100644
--- a/OpenCL/m15000_a0-pure.cl
+++ b/OpenCL/m15000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m15000_a1-optimized.cl b/OpenCL/m15000_a1-optimized.cl
index 7257d2c19..e410b3102 100644
--- a/OpenCL/m15000_a1-optimized.cl
+++ b/OpenCL/m15000_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m15000_a1-pure.cl b/OpenCL/m15000_a1-pure.cl
index 54a3c6d98..6ec8d1576 100644
--- a/OpenCL/m15000_a1-pure.cl
+++ b/OpenCL/m15000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m15000_a3-optimized.cl b/OpenCL/m15000_a3-optimized.cl
index 0021f624d..5ff6d7bfb 100644
--- a/OpenCL/m15000_a3-optimized.cl
+++ b/OpenCL/m15000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m15000_a3-pure.cl b/OpenCL/m15000_a3-pure.cl
index a15aa67d6..9e652f284 100644
--- a/OpenCL/m15000_a3-pure.cl
+++ b/OpenCL/m15000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m15100-pure.cl b/OpenCL/m15100-pure.cl
index 18756cfa4..7b09ec0be 100644
--- a/OpenCL/m15100-pure.cl
+++ b/OpenCL/m15100-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m15300-pure.cl b/OpenCL/m15300-pure.cl
index 8d7bdf942..b6abc46d1 100644
--- a/OpenCL/m15300-pure.cl
+++ b/OpenCL/m15300-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m15400_a0-optimized.cl b/OpenCL/m15400_a0-optimized.cl
index a24157254..294e086e0 100644
--- a/OpenCL/m15400_a0-optimized.cl
+++ b/OpenCL/m15400_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m15400_a1-optimized.cl b/OpenCL/m15400_a1-optimized.cl
index 7508ce935..94de59707 100644
--- a/OpenCL/m15400_a1-optimized.cl
+++ b/OpenCL/m15400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m15400_a3-optimized.cl b/OpenCL/m15400_a3-optimized.cl
index ae57ac965..d2a6d962d 100644
--- a/OpenCL/m15400_a3-optimized.cl
+++ b/OpenCL/m15400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m15500_a0-optimized.cl b/OpenCL/m15500_a0-optimized.cl
index 6644adc51..bc5ae795c 100644
--- a/OpenCL/m15500_a0-optimized.cl
+++ b/OpenCL/m15500_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m15500_a0-pure.cl b/OpenCL/m15500_a0-pure.cl
index eb39484fe..7286c3b64 100644
--- a/OpenCL/m15500_a0-pure.cl
+++ b/OpenCL/m15500_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m15500_a1-optimized.cl b/OpenCL/m15500_a1-optimized.cl
index 116bf8485..e64b33a60 100644
--- a/OpenCL/m15500_a1-optimized.cl
+++ b/OpenCL/m15500_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m15500_a1-pure.cl b/OpenCL/m15500_a1-pure.cl
index f6555c5e0..e93c0838e 100644
--- a/OpenCL/m15500_a1-pure.cl
+++ b/OpenCL/m15500_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m15500_a3-optimized.cl b/OpenCL/m15500_a3-optimized.cl
index 62c352eea..ad20fcf90 100644
--- a/OpenCL/m15500_a3-optimized.cl
+++ b/OpenCL/m15500_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m15500_a3-pure.cl b/OpenCL/m15500_a3-pure.cl
index 7c35074be..1cecef4b8 100644
--- a/OpenCL/m15500_a3-pure.cl
+++ b/OpenCL/m15500_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m15600-pure.cl b/OpenCL/m15600-pure.cl
index 950840f57..05652abf7 100644
--- a/OpenCL/m15600-pure.cl
+++ b/OpenCL/m15600-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl
index ac3e849a9..15bc64b58 100644
--- a/OpenCL/m15700-pure.cl
+++ b/OpenCL/m15700-pure.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_sha256.cl"
 #endif
diff --git a/OpenCL/m15900-pure.cl b/OpenCL/m15900-pure.cl
index 51fdfa0eb..ae5455665 100644
--- a/OpenCL/m15900-pure.cl
+++ b/OpenCL/m15900-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m16000_a0-pure.cl b/OpenCL/m16000_a0-pure.cl
index cf02c5325..991e033a0 100644
--- a/OpenCL/m16000_a0-pure.cl
+++ b/OpenCL/m16000_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m16000_a1-pure.cl b/OpenCL/m16000_a1-pure.cl
index 775be6c57..994e30ace 100644
--- a/OpenCL/m16000_a1-pure.cl
+++ b/OpenCL/m16000_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m16000_a3-pure.cl b/OpenCL/m16000_a3-pure.cl
index 23979dded..17361374f 100644
--- a/OpenCL/m16000_a3-pure.cl
+++ b/OpenCL/m16000_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m16100_a0-optimized.cl b/OpenCL/m16100_a0-optimized.cl
index 45b050973..538b15ef3 100644
--- a/OpenCL/m16100_a0-optimized.cl
+++ b/OpenCL/m16100_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m16100_a0-pure.cl b/OpenCL/m16100_a0-pure.cl
index f6aa82060..4ec287d3b 100644
--- a/OpenCL/m16100_a0-pure.cl
+++ b/OpenCL/m16100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m16100_a1-optimized.cl b/OpenCL/m16100_a1-optimized.cl
index 2c3cece99..3790ea971 100644
--- a/OpenCL/m16100_a1-optimized.cl
+++ b/OpenCL/m16100_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m16100_a1-pure.cl b/OpenCL/m16100_a1-pure.cl
index 04f5d8a57..5225ba685 100644
--- a/OpenCL/m16100_a1-pure.cl
+++ b/OpenCL/m16100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m16100_a3-optimized.cl b/OpenCL/m16100_a3-optimized.cl
index ba96def6b..38aa22be4 100644
--- a/OpenCL/m16100_a3-optimized.cl
+++ b/OpenCL/m16100_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m16100_a3-pure.cl b/OpenCL/m16100_a3-pure.cl
index 3dfb0cc98..daa0f1de3 100644
--- a/OpenCL/m16100_a3-pure.cl
+++ b/OpenCL/m16100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md5.cl"
 #endif
diff --git a/OpenCL/m16200-pure.cl b/OpenCL/m16200-pure.cl
index 2b28662f7..eb9b4fc6f 100644
--- a/OpenCL/m16200-pure.cl
+++ b/OpenCL/m16200-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m16300-pure.cl b/OpenCL/m16300-pure.cl
index 6a6c2a6a8..fa3d51042 100644
--- a/OpenCL/m16300-pure.cl
+++ b/OpenCL/m16300-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m16400_a0-optimized.cl b/OpenCL/m16400_a0-optimized.cl
index 520d2b861..049b33b37 100644
--- a/OpenCL/m16400_a0-optimized.cl
+++ b/OpenCL/m16400_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m16400_a0-pure.cl b/OpenCL/m16400_a0-pure.cl
index 87ad14899..7857c8e1e 100644
--- a/OpenCL/m16400_a0-pure.cl
+++ b/OpenCL/m16400_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m16400_a1-optimized.cl b/OpenCL/m16400_a1-optimized.cl
index 674a6016e..6082d7bc0 100644
--- a/OpenCL/m16400_a1-optimized.cl
+++ b/OpenCL/m16400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_simd.cl"
diff --git a/OpenCL/m16400_a1-pure.cl b/OpenCL/m16400_a1-pure.cl
index 6392f42d1..4f3dcd36c 100644
--- a/OpenCL/m16400_a1-pure.cl
+++ b/OpenCL/m16400_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m16400_a3-optimized.cl b/OpenCL/m16400_a3-optimized.cl
index fd6967267..fa0e00c8b 100644
--- a/OpenCL/m16400_a3-optimized.cl
+++ b/OpenCL/m16400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m16400_a3-pure.cl b/OpenCL/m16400_a3-pure.cl
index 9532b58f4..6d3fef2fa 100644
--- a/OpenCL/m16400_a3-pure.cl
+++ b/OpenCL/m16400_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m16511_a0-pure.cl b/OpenCL/m16511_a0-pure.cl
index cb742d0f6..bb97112a7 100644
--- a/OpenCL/m16511_a0-pure.cl
+++ b/OpenCL/m16511_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m16511_a1-pure.cl b/OpenCL/m16511_a1-pure.cl
index 9d566b93c..f875823f0 100644
--- a/OpenCL/m16511_a1-pure.cl
+++ b/OpenCL/m16511_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m16511_a3-pure.cl b/OpenCL/m16511_a3-pure.cl
index 08841d710..b86939fea 100644
--- a/OpenCL/m16511_a3-pure.cl
+++ b/OpenCL/m16511_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m16512_a0-pure.cl b/OpenCL/m16512_a0-pure.cl
index 121d7c1bf..e92122ec4 100644
--- a/OpenCL/m16512_a0-pure.cl
+++ b/OpenCL/m16512_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m16512_a1-pure.cl b/OpenCL/m16512_a1-pure.cl
index 14e88469b..4ef07bb67 100644
--- a/OpenCL/m16512_a1-pure.cl
+++ b/OpenCL/m16512_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha384.cl"
diff --git a/OpenCL/m16512_a3-pure.cl b/OpenCL/m16512_a3-pure.cl
index 8b1ff6baf..bae71e6c4 100644
--- a/OpenCL/m16512_a3-pure.cl
+++ b/OpenCL/m16512_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha384.cl"
diff --git a/OpenCL/m16513_a0-pure.cl b/OpenCL/m16513_a0-pure.cl
index 4940297bc..fa8c6cb47 100644
--- a/OpenCL/m16513_a0-pure.cl
+++ b/OpenCL/m16513_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m16513_a1-pure.cl b/OpenCL/m16513_a1-pure.cl
index c064f5d45..131931b44 100644
--- a/OpenCL/m16513_a1-pure.cl
+++ b/OpenCL/m16513_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m16513_a3-pure.cl b/OpenCL/m16513_a3-pure.cl
index 476dd6a3d..1f7e3150d 100644
--- a/OpenCL/m16513_a3-pure.cl
+++ b/OpenCL/m16513_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m16600_a0-optimized.cl b/OpenCL/m16600_a0-optimized.cl
index 4422d0e70..425217eb7 100644
--- a/OpenCL/m16600_a0-optimized.cl
+++ b/OpenCL/m16600_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m16600_a0-pure.cl b/OpenCL/m16600_a0-pure.cl
index 5c33c9fbd..ed4328db1 100644
--- a/OpenCL/m16600_a0-pure.cl
+++ b/OpenCL/m16600_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m16600_a1-optimized.cl b/OpenCL/m16600_a1-optimized.cl
index c5942f445..fdb694008 100644
--- a/OpenCL/m16600_a1-optimized.cl
+++ b/OpenCL/m16600_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m16600_a1-pure.cl b/OpenCL/m16600_a1-pure.cl
index 535916cd1..1ac65adad 100644
--- a/OpenCL/m16600_a1-pure.cl
+++ b/OpenCL/m16600_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m16600_a3-optimized.cl b/OpenCL/m16600_a3-optimized.cl
index 94729cbf9..389124a7c 100644
--- a/OpenCL/m16600_a3-optimized.cl
+++ b/OpenCL/m16600_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m16600_a3-pure.cl b/OpenCL/m16600_a3-pure.cl
index a8e3775d8..ddd3d45e3 100644
--- a/OpenCL/m16600_a3-pure.cl
+++ b/OpenCL/m16600_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m16800-pure.cl b/OpenCL/m16800-pure.cl
index f7e3e1f9c..d8c3a26c9 100644
--- a/OpenCL/m16800-pure.cl
+++ b/OpenCL/m16800-pure.cl
@@ -8,12 +8,14 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
 #else
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_simd.h"
 #include "inc_hash_sha1.h"
diff --git a/OpenCL/m16801-pure.cl b/OpenCL/m16801-pure.cl
index f040be21c..0e93e669c 100644
--- a/OpenCL/m16801-pure.cl
+++ b/OpenCL/m16801-pure.cl
@@ -8,12 +8,14 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
 #else
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.h"
 #include "inc_common.h"
 #include "inc_simd.h"
 #include "inc_hash_sha1.h"
diff --git a/OpenCL/m16900-pure.cl b/OpenCL/m16900-pure.cl
index 649a16b2e..f9f2357b9 100644
--- a/OpenCL/m16900-pure.cl
+++ b/OpenCL/m16900-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m17300_a0-optimized.cl b/OpenCL/m17300_a0-optimized.cl
index 30076e82a..011de4eb5 100644
--- a/OpenCL/m17300_a0-optimized.cl
+++ b/OpenCL/m17300_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m17300_a1-optimized.cl b/OpenCL/m17300_a1-optimized.cl
index 3e0e5970d..d110afb2d 100644
--- a/OpenCL/m17300_a1-optimized.cl
+++ b/OpenCL/m17300_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17300_a3-optimized.cl b/OpenCL/m17300_a3-optimized.cl
index 2948331db..58898f5cb 100644
--- a/OpenCL/m17300_a3-optimized.cl
+++ b/OpenCL/m17300_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17400_a0-optimized.cl b/OpenCL/m17400_a0-optimized.cl
index be8eb241e..a157070f0 100644
--- a/OpenCL/m17400_a0-optimized.cl
+++ b/OpenCL/m17400_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m17400_a1-optimized.cl b/OpenCL/m17400_a1-optimized.cl
index 6b3c9c14b..84b14a2b8 100644
--- a/OpenCL/m17400_a1-optimized.cl
+++ b/OpenCL/m17400_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17400_a3-optimized.cl b/OpenCL/m17400_a3-optimized.cl
index 19051f0a2..fd2977b51 100644
--- a/OpenCL/m17400_a3-optimized.cl
+++ b/OpenCL/m17400_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17500_a0-optimized.cl b/OpenCL/m17500_a0-optimized.cl
index 60f807b90..dd62c6956 100644
--- a/OpenCL/m17500_a0-optimized.cl
+++ b/OpenCL/m17500_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m17500_a1-optimized.cl b/OpenCL/m17500_a1-optimized.cl
index a402b7afa..e6a04db79 100644
--- a/OpenCL/m17500_a1-optimized.cl
+++ b/OpenCL/m17500_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17500_a3-optimized.cl b/OpenCL/m17500_a3-optimized.cl
index fe6863dbd..9c6a3c99b 100644
--- a/OpenCL/m17500_a3-optimized.cl
+++ b/OpenCL/m17500_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17600_a0-optimized.cl b/OpenCL/m17600_a0-optimized.cl
index 97394ad0b..bf9c3efb6 100644
--- a/OpenCL/m17600_a0-optimized.cl
+++ b/OpenCL/m17600_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m17600_a1-optimized.cl b/OpenCL/m17600_a1-optimized.cl
index dac85837d..a605df2a8 100644
--- a/OpenCL/m17600_a1-optimized.cl
+++ b/OpenCL/m17600_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17600_a3-optimized.cl b/OpenCL/m17600_a3-optimized.cl
index 37fef9a7b..aed11f579 100644
--- a/OpenCL/m17600_a3-optimized.cl
+++ b/OpenCL/m17600_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17700_a0-optimized.cl b/OpenCL/m17700_a0-optimized.cl
index cdb6d1ede..3bc5f965f 100644
--- a/OpenCL/m17700_a0-optimized.cl
+++ b/OpenCL/m17700_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m17700_a1-optimized.cl b/OpenCL/m17700_a1-optimized.cl
index d3b6d6a14..5dcf2aff7 100644
--- a/OpenCL/m17700_a1-optimized.cl
+++ b/OpenCL/m17700_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17700_a3-optimized.cl b/OpenCL/m17700_a3-optimized.cl
index 7cdd1b22f..bc912a045 100644
--- a/OpenCL/m17700_a3-optimized.cl
+++ b/OpenCL/m17700_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17800_a0-optimized.cl b/OpenCL/m17800_a0-optimized.cl
index b16c36c38..dc4cffadd 100644
--- a/OpenCL/m17800_a0-optimized.cl
+++ b/OpenCL/m17800_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m17800_a1-optimized.cl b/OpenCL/m17800_a1-optimized.cl
index 8e7054235..8733184de 100644
--- a/OpenCL/m17800_a1-optimized.cl
+++ b/OpenCL/m17800_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17800_a3-optimized.cl b/OpenCL/m17800_a3-optimized.cl
index f62ab2a0e..f2d497e36 100644
--- a/OpenCL/m17800_a3-optimized.cl
+++ b/OpenCL/m17800_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17900_a0-optimized.cl b/OpenCL/m17900_a0-optimized.cl
index b28cce05b..5ed128657 100644
--- a/OpenCL/m17900_a0-optimized.cl
+++ b/OpenCL/m17900_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m17900_a1-optimized.cl b/OpenCL/m17900_a1-optimized.cl
index a182385f4..d6518aeb8 100644
--- a/OpenCL/m17900_a1-optimized.cl
+++ b/OpenCL/m17900_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m17900_a3-optimized.cl b/OpenCL/m17900_a3-optimized.cl
index afeb00627..43d2791d9 100644
--- a/OpenCL/m17900_a3-optimized.cl
+++ b/OpenCL/m17900_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m18000_a0-optimized.cl b/OpenCL/m18000_a0-optimized.cl
index 3be89d66d..1d587b1db 100644
--- a/OpenCL/m18000_a0-optimized.cl
+++ b/OpenCL/m18000_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m18000_a1-optimized.cl b/OpenCL/m18000_a1-optimized.cl
index 30164893d..1964d037a 100644
--- a/OpenCL/m18000_a1-optimized.cl
+++ b/OpenCL/m18000_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m18000_a3-optimized.cl b/OpenCL/m18000_a3-optimized.cl
index b6f7a33f8..a30de8b7e 100644
--- a/OpenCL/m18000_a3-optimized.cl
+++ b/OpenCL/m18000_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #endif
diff --git a/OpenCL/m18100_a0-pure.cl b/OpenCL/m18100_a0-pure.cl
index d22b3bade..d923a95e8 100644
--- a/OpenCL/m18100_a0-pure.cl
+++ b/OpenCL/m18100_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m18100_a1-pure.cl b/OpenCL/m18100_a1-pure.cl
index 223cbece0..2b170bc4a 100644
--- a/OpenCL/m18100_a1-pure.cl
+++ b/OpenCL/m18100_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m18100_a3-pure.cl b/OpenCL/m18100_a3-pure.cl
index be4805773..ffd2525fe 100644
--- a/OpenCL/m18100_a3-pure.cl
+++ b/OpenCL/m18100_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m18200_a0-optimized.cl b/OpenCL/m18200_a0-optimized.cl
index a520da74d..091be57aa 100644
--- a/OpenCL/m18200_a0-optimized.cl
+++ b/OpenCL/m18200_a0-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m18200_a0-pure.cl b/OpenCL/m18200_a0-pure.cl
index 497ac3a3c..878c7b175 100644
--- a/OpenCL/m18200_a0-pure.cl
+++ b/OpenCL/m18200_a0-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m18200_a1-optimized.cl b/OpenCL/m18200_a1-optimized.cl
index 0d9efe343..c9a7e49d3 100644
--- a/OpenCL/m18200_a1-optimized.cl
+++ b/OpenCL/m18200_a1-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m18200_a1-pure.cl b/OpenCL/m18200_a1-pure.cl
index 519c8498d..258f3323b 100644
--- a/OpenCL/m18200_a1-pure.cl
+++ b/OpenCL/m18200_a1-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md4.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m18200_a3-optimized.cl b/OpenCL/m18200_a3-optimized.cl
index 404c56521..3726bf080 100644
--- a/OpenCL/m18200_a3-optimized.cl
+++ b/OpenCL/m18200_a3-optimized.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md4.cl"
diff --git a/OpenCL/m18200_a3-pure.cl b/OpenCL/m18200_a3-pure.cl
index 30c298cfd..666774877 100644
--- a/OpenCL/m18200_a3-pure.cl
+++ b/OpenCL/m18200_a3-pure.cl
@@ -9,6 +9,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_hash_md4.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m18300-pure.cl b/OpenCL/m18300-pure.cl
index 9e1189d75..26f593a7b 100644
--- a/OpenCL/m18300-pure.cl
+++ b/OpenCL/m18300-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m18400-pure.cl b/OpenCL/m18400-pure.cl
index 3a4a80c27..fe5391fb0 100644
--- a/OpenCL/m18400-pure.cl
+++ b/OpenCL/m18400-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m18500_a0-pure.cl b/OpenCL/m18500_a0-pure.cl
index 7a68132b8..415506b0c 100644
--- a/OpenCL/m18500_a0-pure.cl
+++ b/OpenCL/m18500_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m18500_a1-pure.cl b/OpenCL/m18500_a1-pure.cl
index 504d2577e..84c725944 100644
--- a/OpenCL/m18500_a1-pure.cl
+++ b/OpenCL/m18500_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m18500_a3-pure.cl b/OpenCL/m18500_a3-pure.cl
index a34072250..d758dc5c9 100644
--- a/OpenCL/m18500_a3-pure.cl
+++ b/OpenCL/m18500_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl
index b939a4381..f68d6f82f 100644
--- a/OpenCL/m18600-pure.cl
+++ b/OpenCL/m18600-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m18700_a0-optimized.cl b/OpenCL/m18700_a0-optimized.cl
index de8eabbe9..ba5ccff14 100644
--- a/OpenCL/m18700_a0-optimized.cl
+++ b/OpenCL/m18700_a0-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp_optimized.h"
 #include "inc_rp_optimized.cl"
diff --git a/OpenCL/m18700_a0-pure.cl b/OpenCL/m18700_a0-pure.cl
index a7276b9eb..b7ee45dae 100644
--- a/OpenCL/m18700_a0-pure.cl
+++ b/OpenCL/m18700_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m18700_a1-optimized.cl b/OpenCL/m18700_a1-optimized.cl
index 215b0bcfc..18ea7ffbd 100644
--- a/OpenCL/m18700_a1-optimized.cl
+++ b/OpenCL/m18700_a1-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_simd.cl"
diff --git a/OpenCL/m18700_a1-pure.cl b/OpenCL/m18700_a1-pure.cl
index c80a904b7..1f69f8e90 100644
--- a/OpenCL/m18700_a1-pure.cl
+++ b/OpenCL/m18700_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m18700_a3-optimized.cl b/OpenCL/m18700_a3-optimized.cl
index 0df5cb0cc..9b09e3308 100644
--- a/OpenCL/m18700_a3-optimized.cl
+++ b/OpenCL/m18700_a3-optimized.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m18700_a3-pure.cl b/OpenCL/m18700_a3-pure.cl
index 4f6221724..286656a84 100644
--- a/OpenCL/m18700_a3-pure.cl
+++ b/OpenCL/m18700_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m18800-pure.cl b/OpenCL/m18800-pure.cl
index 8dfd54c30..e237b865a 100644
--- a/OpenCL/m18800-pure.cl
+++ b/OpenCL/m18800-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m18900-pure.cl b/OpenCL/m18900-pure.cl
index 980d0bea9..e203d4326 100644
--- a/OpenCL/m18900-pure.cl
+++ b/OpenCL/m18900-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m19000-pure.cl b/OpenCL/m19000-pure.cl
index 499f5a458..d46951049 100644
--- a/OpenCL/m19000-pure.cl
+++ b/OpenCL/m19000-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_md5.cl"
diff --git a/OpenCL/m19100-pure.cl b/OpenCL/m19100-pure.cl
index 21fc4328e..98a3ae43b 100644
--- a/OpenCL/m19100-pure.cl
+++ b/OpenCL/m19100-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
diff --git a/OpenCL/m19200-pure.cl b/OpenCL/m19200-pure.cl
index 8d40173e7..a77742ae1 100644
--- a/OpenCL/m19200-pure.cl
+++ b/OpenCL/m19200-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m19300_a0-pure.cl b/OpenCL/m19300_a0-pure.cl
index d0e46a0cc..eebb953d6 100644
--- a/OpenCL/m19300_a0-pure.cl
+++ b/OpenCL/m19300_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m19300_a1-pure.cl b/OpenCL/m19300_a1-pure.cl
index ea565a259..0e11ddb18 100644
--- a/OpenCL/m19300_a1-pure.cl
+++ b/OpenCL/m19300_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m19300_a3-pure.cl b/OpenCL/m19300_a3-pure.cl
index 4b65dd687..fc715f841 100644
--- a/OpenCL/m19300_a3-pure.cl
+++ b/OpenCL/m19300_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m19500_a0-pure.cl b/OpenCL/m19500_a0-pure.cl
index 31f2b56db..665250791 100644
--- a/OpenCL/m19500_a0-pure.cl
+++ b/OpenCL/m19500_a0-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_rp.h"
 #include "inc_rp.cl"
diff --git a/OpenCL/m19500_a1-pure.cl b/OpenCL/m19500_a1-pure.cl
index 00f6bc9e7..2a4ba3d16 100644
--- a/OpenCL/m19500_a1-pure.cl
+++ b/OpenCL/m19500_a1-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m19500_a3-pure.cl b/OpenCL/m19500_a3-pure.cl
index 61dce2d47..85b752854 100644
--- a/OpenCL/m19500_a3-pure.cl
+++ b/OpenCL/m19500_a3-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m19600-pure.cl b/OpenCL/m19600-pure.cl
index aa19771fe..14c62dd42 100644
--- a/OpenCL/m19600-pure.cl
+++ b/OpenCL/m19600-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m19700-pure.cl b/OpenCL/m19700-pure.cl
index efa5493ec..2035901d7 100644
--- a/OpenCL/m19700-pure.cl
+++ b/OpenCL/m19700-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m19800-pure.cl b/OpenCL/m19800-pure.cl
index 5c46c3edf..1ec3fb4f0 100644
--- a/OpenCL/m19800-pure.cl
+++ b/OpenCL/m19800-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m19900-pure.cl b/OpenCL/m19900-pure.cl
index b5cfa8bfd..35c0abb22 100644
--- a/OpenCL/m19900-pure.cl
+++ b/OpenCL/m19900-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha1.cl"
diff --git a/OpenCL/m20011-pure.cl b/OpenCL/m20011-pure.cl
index 4733e9f91..e6136020e 100644
--- a/OpenCL/m20011-pure.cl
+++ b/OpenCL/m20011-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m20012-pure.cl b/OpenCL/m20012-pure.cl
index dae331981..0fad4fdbf 100644
--- a/OpenCL/m20012-pure.cl
+++ b/OpenCL/m20012-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/m20013-pure.cl b/OpenCL/m20013-pure.cl
index fcd2b815a..c711846b5 100644
--- a/OpenCL/m20013-pure.cl
+++ b/OpenCL/m20013-pure.cl
@@ -8,6 +8,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
diff --git a/OpenCL/markov_be.cl b/OpenCL/markov_be.cl
index cdf716ab9..a69e5e691 100644
--- a/OpenCL/markov_be.cl
+++ b/OpenCL/markov_be.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #endif
 
 #define CHARSIZ 256
diff --git a/OpenCL/markov_le.cl b/OpenCL/markov_le.cl
index cb5a91321..f1feb7819 100644
--- a/OpenCL/markov_le.cl
+++ b/OpenCL/markov_le.cl
@@ -6,6 +6,7 @@
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
+#include "inc_platform.cl"
 #endif
 
 #define CHARSIZ 256

From 3b7304c9d8110f1896a39a47f77251e4b6816e2f Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 14:01:14 +0200
Subject: [PATCH 06/73] Fix recursion in inc_platform.cl

---
 OpenCL/inc_platform.cl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index 76ac96b6c..ceb12a4f1 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -5,7 +5,6 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
-#include "inc_platform.cl"
 #include "inc_platform.h"
 
 #ifdef IS_NATIVE

From d9cb5cf8dfe1cdfbe9075ae337fac0c7ba989398 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 14:03:57 +0200
Subject: [PATCH 07/73] Fix recursion in inc_common.cl

---
 OpenCL/inc_common.cl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index a74fd2a2a..844e4ba12 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -5,7 +5,6 @@
 
 #include "inc_vendor.h"
 #include "inc_types.h"
-#include "inc_platform.cl"
 #include "inc_platform.h"
 #include "inc_common.h"
 

From 6a32e8ef18e01ad062834c72c9f6b96048f97006 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 14:11:13 +0200
Subject: [PATCH 08/73] Fix ulong datatype on Windows x64

---
 OpenCL/inc_types.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 867bb728e..daa93cf2e 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -7,10 +7,11 @@
 #define _INC_TYPES_H
 
 #ifdef IS_CUDA
-typedef unsigned char  uchar;
-typedef unsigned short ushort;
-typedef unsigned int   uint;
-typedef unsigned long  ulong;
+//https://docs.nvidia.com/cuda/nvrtc/index.html#integer-size
+typedef unsigned char      uchar;
+typedef unsigned short     ushort;
+typedef unsigned int       uint;
+typedef unsigned long long ulong;
 #endif
 
 #ifdef KERNEL_STATIC

From 06171958eec6c96b1dcf4abcd25e5b64fa50e4bc Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 15:10:02 +0200
Subject: [PATCH 09/73] Add --gpu-architecture to NVRTC build options

---
 include/types.h |   6 +-
 src/backend.c   | 509 ++++++++++++++++++++++++++----------------------
 2 files changed, 284 insertions(+), 231 deletions(-)

diff --git a/include/types.h b/include/types.h
index 0f0ecf1b4..6e2bfdcac 100644
--- a/include/types.h
+++ b/include/types.h
@@ -995,6 +995,8 @@ typedef struct link_speed
 
 typedef struct hc_device_param
 {
+  CUdevice        device_cuda;
+
   cl_device_id    device;
   cl_device_type  device_type;
 
@@ -1006,8 +1008,8 @@ typedef struct hc_device_param
 
   st_status_t st_status;
 
-  u32     sm_major;
-  u32     sm_minor;
+  int     sm_major;
+  int     sm_minor;
   u32     kernel_exec_timeout;
 
   u8      pcie_bus;
diff --git a/src/backend.c b/src/backend.c
index efb118614..f453bfdda 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -543,6 +543,8 @@ void generate_cached_kernel_amp_filename (const u32 attack_kern, char *profile_d
   snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.kernel", profile_dir, attack_kern, device_name_chksum_amp_mp);
 }
 
+// NVRTC
+
 int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -611,227 +613,6 @@ void nvrtc_close (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-int cuda_init (hashcat_ctx_t *hashcat_ctx)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  CUDA_PTR *cuda = backend_ctx->cuda;
-
-  memset (cuda, 0, sizeof (CUDA_PTR));
-
-  #if   defined (_WIN)
-  cuda->lib = hc_dlopen ("cuda");
-  #elif defined (__APPLE__)
-  cuda->lib = hc_dlopen ("/System/Library/Frameworks/CUDA.framework/CUDA");
-  #elif defined (__CYGWIN__)
-  cuda->lib = hc_dlopen ("cuda.dll");
-
-  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("cygcuda-1.dll");
-  #else
-  cuda->lib = hc_dlopen ("libcuda.so");
-
-  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("libcuda.so.1");
-  #endif
-
-  if (cuda->lib == NULL)
-  {
-    event_log_error (hashcat_ctx, "Cannot find CUDA library.");
-
-    event_log_warning (hashcat_ctx, "You are probably missing the native CUDA runtime or driver for your platform.");
-    event_log_warning (hashcat_ctx, "NVIDIA GPUs require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
-    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
-    event_log_warning (hashcat_ctx, NULL);
-
-    return -1;
-  }
-
-  HC_LOAD_FUNC (cuda, cuCtxCreate,              CUDA_CUCTXCREATE,               CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxDestroy,             CUDA_CUCTXDESTROY,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxGetCacheConfig,      CUDA_CUCTXGETCACHECONFIG,       CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxGetCurrent,          CUDA_CUCTXGETCURRENT,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxGetSharedMemConfig,  CUDA_CUCTXGETSHAREDMEMCONFIG,   CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxPopCurrent,          CUDA_CUCTXPOPCURRENT,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxPushCurrent,         CUDA_CUCTXPUSHCURRENT,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxSetCurrent,          CUDA_CUCTXSETCURRENT,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxSetSharedMemConfig,  CUDA_CUCTXSETSHAREDMEMCONFIG,   CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxSynchronize,         CUDA_CUCTXSYNCHRONIZE,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceGetAttribute,     CUDA_CUDEVICEGETATTRIBUTE,      CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceGetCount,         CUDA_CUDEVICEGETCOUNT,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceGet,              CUDA_CUDEVICEGET,               CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceGetName,          CUDA_CUDEVICEGETNAME,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceTotalMem,         CUDA_CUDEVICETOTALMEM,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDriverGetVersion,       CUDA_CUDRIVERGETVERSION,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventCreate,            CUDA_CUEVENTCREATE,             CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventDestroy,           CUDA_CUEVENTDESTROY,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventElapsedTime,       CUDA_CUEVENTELAPSEDTIME,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventQuery,             CUDA_CUEVENTQUERY,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventRecord,            CUDA_CUEVENTRECORD,             CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventSynchronize,       CUDA_CUEVENTSYNCHRONIZE,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuFuncGetAttribute,       CUDA_CUFUNCGETATTRIBUTE,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuFuncSetAttribute,       CUDA_CUFUNCSETATTRIBUTE,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuFuncSetCacheConfig,     CUDA_CUFUNCSETCACHECONFIG,      CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuFuncSetSharedMemConfig, CUDA_CUFUNCSETSHAREDMEMCONFIG,  CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuGetErrorName,           CUDA_CUGETERRORNAME,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuGetErrorString,         CUDA_CUGETERRORSTRING,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuInit,                   CUDA_CUINIT,                    CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuLaunchKernel,           CUDA_CULAUNCHKERNEL,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemAlloc,               CUDA_CUMEMALLOC,                CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemAllocHost,           CUDA_CUMEMALLOCHOST,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemcpyDtoD,             CUDA_CUMEMCPYDTOD,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemcpyDtoH,             CUDA_CUMEMCPYDTOH,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemcpyHtoD,             CUDA_CUMEMCPYHTOD,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemFree,                CUDA_CUMEMFREE,                 CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemFreeHost,            CUDA_CUMEMFREEHOST,             CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemGetInfo,             CUDA_CUMEMGETINFO,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemsetD32,              CUDA_CUMEMSETD32,               CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemsetD8,               CUDA_CUMEMSETD8,                CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleGetFunction,      CUDA_CUMODULEGETFUNCTION,       CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleGetGlobal,        CUDA_CUMODULEGETGLOBAL,         CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleLoad,             CUDA_CUMODULELOAD,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleLoadData,         CUDA_CUMODULELOADDATA,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleLoadDataEx,       CUDA_CUMODULELOADDATAEX,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleUnload,           CUDA_CUMODULEUNLOAD,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuProfilerStart,          CUDA_CUPROFILERSTART,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuProfilerStop,           CUDA_CUPROFILERSTOP,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuStreamCreate,           CUDA_CUSTREAMCREATE,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuStreamDestroy,          CUDA_CUSTREAMDESTROY,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuStreamSynchronize,      CUDA_CUSTREAMSYNCHRONIZE,       CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuStreamWaitEvent,        CUDA_CUSTREAMWAITEVENT,         CUDA, 1);
-
-  return 0;
-}
-
-void cuda_close (hashcat_ctx_t *hashcat_ctx)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  CUDA_PTR *cuda = backend_ctx->cuda;
-
-  if (cuda)
-  {
-    if (cuda->lib)
-    {
-      hc_dlclose (cuda->lib);
-    }
-
-    hcfree (backend_ctx->cuda);
-
-    backend_ctx->cuda = NULL;
-  }
-}
-
-int ocl_init (hashcat_ctx_t *hashcat_ctx)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  OCL_PTR *ocl = backend_ctx->ocl;
-
-  memset (ocl, 0, sizeof (OCL_PTR));
-
-  #if   defined (_WIN)
-  ocl->lib = hc_dlopen ("OpenCL");
-  #elif defined (__APPLE__)
-  ocl->lib = hc_dlopen ("/System/Library/Frameworks/OpenCL.framework/OpenCL");
-  #elif defined (__CYGWIN__)
-  ocl->lib = hc_dlopen ("opencl.dll");
-
-  if (ocl->lib == NULL) ocl->lib = hc_dlopen ("cygOpenCL-1.dll");
-  #else
-  ocl->lib = hc_dlopen ("libOpenCL.so");
-
-  if (ocl->lib == NULL) ocl->lib = hc_dlopen ("libOpenCL.so.1");
-  #endif
-
-  if (ocl->lib == NULL)
-  {
-    event_log_error (hashcat_ctx, "Cannot find an OpenCL ICD loader library.");
-
-    event_log_warning (hashcat_ctx, "You are probably missing the native OpenCL runtime or driver for your platform.");
-    event_log_warning (hashcat_ctx, NULL);
-
-    #if defined (__linux__)
-    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (1.6.180 or later)");
-    #elif defined (_WIN)
-    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"AMD Radeon Software Crimson Edition\" (15.12 or later)");
-    #endif
-
-    event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"OpenCL Runtime for Intel Core and Intel Xeon Processors\" (16.1.1 or later)");
-
-    #if defined (__linux__)
-    event_log_warning (hashcat_ctx, "* Intel GPUs on Linux require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"OpenCL 2.0 GPU Driver Package for Linux\" (2.0 or later)");
-    #elif defined (_WIN)
-    event_log_warning (hashcat_ctx, "* Intel GPUs on Windows require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"OpenCL Driver for Intel Iris and Intel HD Graphics\"");
-    #endif
-
-    event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
-    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
-    event_log_warning (hashcat_ctx, NULL);
-
-    return -1;
-  }
-
-  HC_LOAD_FUNC (ocl, clBuildProgram,            OCL_CLBUILDPROGRAM,             OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clCreateBuffer,            OCL_CLCREATEBUFFER,             OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clCreateCommandQueue,      OCL_CLCREATECOMMANDQUEUE,       OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clCreateContext,           OCL_CLCREATECONTEXT,            OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clCreateKernel,            OCL_CLCREATEKERNEL,             OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clCreateProgramWithBinary, OCL_CLCREATEPROGRAMWITHBINARY,  OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clCreateProgramWithSource, OCL_CLCREATEPROGRAMWITHSOURCE,  OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clEnqueueCopyBuffer,       OCL_CLENQUEUECOPYBUFFER,        OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clEnqueueMapBuffer,        OCL_CLENQUEUEMAPBUFFER,         OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clEnqueueNDRangeKernel,    OCL_CLENQUEUENDRANGEKERNEL,     OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clEnqueueReadBuffer,       OCL_CLENQUEUEREADBUFFER,        OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clEnqueueUnmapMemObject,   OCL_CLENQUEUEUNMAPMEMOBJECT,    OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clEnqueueWriteBuffer,      OCL_CLENQUEUEWRITEBUFFER,       OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clFinish,                  OCL_CLFINISH,                   OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clFlush,                   OCL_CLFLUSH,                    OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetDeviceIDs,            OCL_CLGETDEVICEIDS,             OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetDeviceInfo,           OCL_CLGETDEVICEINFO,            OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetEventInfo,            OCL_CLGETEVENTINFO,             OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetKernelWorkGroupInfo,  OCL_CLGETKERNELWORKGROUPINFO,   OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetPlatformIDs,          OCL_CLGETPLATFORMIDS,           OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetPlatformInfo,         OCL_CLGETPLATFORMINFO,          OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetProgramBuildInfo,     OCL_CLGETPROGRAMBUILDINFO,      OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetProgramInfo,          OCL_CLGETPROGRAMINFO,           OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clReleaseCommandQueue,     OCL_CLRELEASECOMMANDQUEUE,      OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clReleaseContext,          OCL_CLRELEASECONTEXT,           OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clReleaseKernel,           OCL_CLRELEASEKERNEL,            OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clReleaseMemObject,        OCL_CLRELEASEMEMOBJECT,         OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clReleaseProgram,          OCL_CLRELEASEPROGRAM,           OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clSetKernelArg,            OCL_CLSETKERNELARG,             OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clWaitForEvents,           OCL_CLWAITFOREVENTS,            OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clGetEventProfilingInfo,   OCL_CLGETEVENTPROFILINGINFO,    OpenCL, 1);
-  HC_LOAD_FUNC (ocl, clReleaseEvent,            OCL_CLRELEASEEVENT,             OpenCL, 1);
-
-  return 0;
-}
-
-void ocl_close (hashcat_ctx_t *hashcat_ctx)
-{
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  OCL_PTR *ocl = backend_ctx->ocl;
-
-  if (ocl)
-  {
-    if (ocl->lib)
-    {
-      hc_dlclose (ocl->lib);
-    }
-
-    hcfree (backend_ctx->ocl);
-
-    backend_ctx->ocl = NULL;
-  }
-}
-
 int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -958,6 +739,258 @@ int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx)
   return 0;
 }
 
+// CUDA
+
+int cuda_init (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  memset (cuda, 0, sizeof (CUDA_PTR));
+
+  #if   defined (_WIN)
+  cuda->lib = hc_dlopen ("cuda");
+  #elif defined (__APPLE__)
+  cuda->lib = hc_dlopen ("/System/Library/Frameworks/CUDA.framework/CUDA");
+  #elif defined (__CYGWIN__)
+  cuda->lib = hc_dlopen ("cuda.dll");
+
+  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("cygcuda-1.dll");
+  #else
+  cuda->lib = hc_dlopen ("libcuda.so");
+
+  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("libcuda.so.1");
+  #endif
+
+  if (cuda->lib == NULL)
+  {
+    event_log_error (hashcat_ctx, "Cannot find CUDA library.");
+
+    event_log_warning (hashcat_ctx, "You are probably missing the native CUDA runtime or driver for your platform.");
+    event_log_warning (hashcat_ctx, "NVIDIA GPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
+    event_log_warning (hashcat_ctx, NULL);
+
+    return -1;
+  }
+
+  HC_LOAD_FUNC (cuda, cuCtxCreate,              CUDA_CUCTXCREATE,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxDestroy,             CUDA_CUCTXDESTROY,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetCacheConfig,      CUDA_CUCTXGETCACHECONFIG,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetCurrent,          CUDA_CUCTXGETCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxGetSharedMemConfig,  CUDA_CUCTXGETSHAREDMEMCONFIG,   CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxPopCurrent,          CUDA_CUCTXPOPCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxPushCurrent,         CUDA_CUCTXPUSHCURRENT,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSetCurrent,          CUDA_CUCTXSETCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSetSharedMemConfig,  CUDA_CUCTXSETSHAREDMEMCONFIG,   CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSynchronize,         CUDA_CUCTXSYNCHRONIZE,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetAttribute,     CUDA_CUDEVICEGETATTRIBUTE,      CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetCount,         CUDA_CUDEVICEGETCOUNT,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGet,              CUDA_CUDEVICEGET,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceGetName,          CUDA_CUDEVICEGETNAME,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDeviceTotalMem,         CUDA_CUDEVICETOTALMEM,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuDriverGetVersion,       CUDA_CUDRIVERGETVERSION,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventCreate,            CUDA_CUEVENTCREATE,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventDestroy,           CUDA_CUEVENTDESTROY,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventElapsedTime,       CUDA_CUEVENTELAPSEDTIME,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventQuery,             CUDA_CUEVENTQUERY,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventRecord,            CUDA_CUEVENTRECORD,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuEventSynchronize,       CUDA_CUEVENTSYNCHRONIZE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncGetAttribute,       CUDA_CUFUNCGETATTRIBUTE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetAttribute,       CUDA_CUFUNCSETATTRIBUTE,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetCacheConfig,     CUDA_CUFUNCSETCACHECONFIG,      CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuFuncSetSharedMemConfig, CUDA_CUFUNCSETSHAREDMEMCONFIG,  CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuGetErrorName,           CUDA_CUGETERRORNAME,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuGetErrorString,         CUDA_CUGETERRORSTRING,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuInit,                   CUDA_CUINIT,                    CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuLaunchKernel,           CUDA_CULAUNCHKERNEL,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemAlloc,               CUDA_CUMEMALLOC,                CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemAllocHost,           CUDA_CUMEMALLOCHOST,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyDtoD,             CUDA_CUMEMCPYDTOD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyDtoH,             CUDA_CUMEMCPYDTOH,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemcpyHtoD,             CUDA_CUMEMCPYHTOD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemFree,                CUDA_CUMEMFREE,                 CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemFreeHost,            CUDA_CUMEMFREEHOST,             CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemGetInfo,             CUDA_CUMEMGETINFO,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemsetD32,              CUDA_CUMEMSETD32,               CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuMemsetD8,               CUDA_CUMEMSETD8,                CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleGetFunction,      CUDA_CUMODULEGETFUNCTION,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleGetGlobal,        CUDA_CUMODULEGETGLOBAL,         CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoad,             CUDA_CUMODULELOAD,              CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoadData,         CUDA_CUMODULELOADDATA,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleLoadDataEx,       CUDA_CUMODULELOADDATAEX,        CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuModuleUnload,           CUDA_CUMODULEUNLOAD,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuProfilerStart,          CUDA_CUPROFILERSTART,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuProfilerStop,           CUDA_CUPROFILERSTOP,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamCreate,           CUDA_CUSTREAMCREATE,            CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamDestroy,          CUDA_CUSTREAMDESTROY,           CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamSynchronize,      CUDA_CUSTREAMSYNCHRONIZE,       CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuStreamWaitEvent,        CUDA_CUSTREAMWAITEVENT,         CUDA, 1);
+
+  return 0;
+}
+
+void cuda_close (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  if (cuda)
+  {
+    if (cuda->lib)
+    {
+      hc_dlclose (cuda->lib);
+    }
+
+    hcfree (backend_ctx->cuda);
+
+    backend_ctx->cuda = NULL;
+  }
+}
+
+int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuDeviceGetAttribute (pi, attrib, dev);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuDeviceGetAttribute(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuDeviceGetAttribute(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+// OpenCL
+
+int ocl_init (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  OCL_PTR *ocl = backend_ctx->ocl;
+
+  memset (ocl, 0, sizeof (OCL_PTR));
+
+  #if   defined (_WIN)
+  ocl->lib = hc_dlopen ("OpenCL");
+  #elif defined (__APPLE__)
+  ocl->lib = hc_dlopen ("/System/Library/Frameworks/OpenCL.framework/OpenCL");
+  #elif defined (__CYGWIN__)
+  ocl->lib = hc_dlopen ("opencl.dll");
+
+  if (ocl->lib == NULL) ocl->lib = hc_dlopen ("cygOpenCL-1.dll");
+  #else
+  ocl->lib = hc_dlopen ("libOpenCL.so");
+
+  if (ocl->lib == NULL) ocl->lib = hc_dlopen ("libOpenCL.so.1");
+  #endif
+
+  if (ocl->lib == NULL)
+  {
+    event_log_error (hashcat_ctx, "Cannot find an OpenCL ICD loader library.");
+
+    event_log_warning (hashcat_ctx, "You are probably missing the native OpenCL runtime or driver for your platform.");
+    event_log_warning (hashcat_ctx, NULL);
+
+    #if defined (__linux__)
+    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (1.6.180 or later)");
+    #elif defined (_WIN)
+    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"AMD Radeon Software Crimson Edition\" (15.12 or later)");
+    #endif
+
+    event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"OpenCL Runtime for Intel Core and Intel Xeon Processors\" (16.1.1 or later)");
+
+    #if defined (__linux__)
+    event_log_warning (hashcat_ctx, "* Intel GPUs on Linux require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"OpenCL 2.0 GPU Driver Package for Linux\" (2.0 or later)");
+    #elif defined (_WIN)
+    event_log_warning (hashcat_ctx, "* Intel GPUs on Windows require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"OpenCL Driver for Intel Iris and Intel HD Graphics\"");
+    #endif
+
+    event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
+    event_log_warning (hashcat_ctx, NULL);
+
+    return -1;
+  }
+
+  HC_LOAD_FUNC (ocl, clBuildProgram,            OCL_CLBUILDPROGRAM,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateBuffer,            OCL_CLCREATEBUFFER,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateCommandQueue,      OCL_CLCREATECOMMANDQUEUE,       OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateContext,           OCL_CLCREATECONTEXT,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateKernel,            OCL_CLCREATEKERNEL,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateProgramWithBinary, OCL_CLCREATEPROGRAMWITHBINARY,  OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clCreateProgramWithSource, OCL_CLCREATEPROGRAMWITHSOURCE,  OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueCopyBuffer,       OCL_CLENQUEUECOPYBUFFER,        OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueMapBuffer,        OCL_CLENQUEUEMAPBUFFER,         OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueNDRangeKernel,    OCL_CLENQUEUENDRANGEKERNEL,     OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueReadBuffer,       OCL_CLENQUEUEREADBUFFER,        OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueUnmapMemObject,   OCL_CLENQUEUEUNMAPMEMOBJECT,    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clEnqueueWriteBuffer,      OCL_CLENQUEUEWRITEBUFFER,       OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clFinish,                  OCL_CLFINISH,                   OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clFlush,                   OCL_CLFLUSH,                    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetDeviceIDs,            OCL_CLGETDEVICEIDS,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetDeviceInfo,           OCL_CLGETDEVICEINFO,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetEventInfo,            OCL_CLGETEVENTINFO,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetKernelWorkGroupInfo,  OCL_CLGETKERNELWORKGROUPINFO,   OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetPlatformIDs,          OCL_CLGETPLATFORMIDS,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetPlatformInfo,         OCL_CLGETPLATFORMINFO,          OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetProgramBuildInfo,     OCL_CLGETPROGRAMBUILDINFO,      OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetProgramInfo,          OCL_CLGETPROGRAMINFO,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseCommandQueue,     OCL_CLRELEASECOMMANDQUEUE,      OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseContext,          OCL_CLRELEASECONTEXT,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseKernel,           OCL_CLRELEASEKERNEL,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseMemObject,        OCL_CLRELEASEMEMOBJECT,         OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseProgram,          OCL_CLRELEASEPROGRAM,           OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clSetKernelArg,            OCL_CLSETKERNELARG,             OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clWaitForEvents,           OCL_CLWAITFOREVENTS,            OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clGetEventProfilingInfo,   OCL_CLGETEVENTPROFILINGINFO,    OpenCL, 1);
+  HC_LOAD_FUNC (ocl, clReleaseEvent,            OCL_CLRELEASEEVENT,             OpenCL, 1);
+
+  return 0;
+}
+
+void ocl_close (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  OCL_PTR *ocl = backend_ctx->ocl;
+
+  if (ocl)
+  {
+    if (ocl->lib)
+    {
+      hc_dlclose (ocl->lib);
+    }
+
+    hcfree (backend_ctx->ocl);
+
+    backend_ctx->ocl = NULL;
+  }
+}
+
 int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -3571,6 +3604,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
     // platform vendor
 
     int CL_rc;
+    int CU_rc;
 
     CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VENDOR, 0, NULL, &param_value_size);
 
@@ -4150,16 +4184,30 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           device_param->pcie_device   = (u8) (pci_slot_id_nv >> 3);
           device_param->pcie_function = (u8) (pci_slot_id_nv & 7);
 
-          cl_uint sm_minor = 0;
-          cl_uint sm_major = 0;
+          int sm_minor = 0;
+          int sm_major = 0;
 
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL);
+          //if (backend_ctx->cuda)
+          if (0)
+          {
+            CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device_param->device_cuda);
 
-          if (CL_rc == -1) return -1;
+            if (CU_rc == -1) return -1;
 
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL);
+            CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device_param->device_cuda);
 
-          if (CL_rc == -1) return -1;
+            if (CU_rc == -1) return -1;
+          }
+          else
+          {
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
 
           device_param->sm_minor = sm_minor;
           device_param->sm_major = sm_major;
@@ -5490,13 +5538,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_nvrtcCreateProgram == -1) return -1;
 
-          char **nvrtc_options = (char **) hccalloc (1 + strlen (build_options_module_buf) + 1, sizeof (char *)); // ...
+          char **nvrtc_options = (char **) hccalloc (3 + strlen (build_options_module_buf) + 1, sizeof (char *)); // ...
 
           nvrtc_options[0] = "--device-as-default-execution-space";
+          nvrtc_options[1] = "--gpu-architecture";
+
+          hc_asprintf (&nvrtc_options[2], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
 
           char *nvrtc_options_string = hcstrdup (build_options_module_buf);
 
-          const int num_options = 1 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 1);
+          const int num_options = 3 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 3);
 
           const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options);
 

From 052e42ccefad39e8c7f57b5db3a1514e060f1ad5 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 15:14:48 +0200
Subject: [PATCH 10/73] Fix CUDA_ARCH value

---
 src/backend.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index f453bfdda..4cebf4ecb 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5364,9 +5364,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     // we don't have sm_* on vendors not NV but it doesn't matter
 
     #if defined (DEBUG)
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
     #else
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
     #endif
 
     build_options_buf[build_options_len] = 0;

From 58213c81d685c2d991105befed13f9e007e61ef3 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 26 Apr 2019 22:07:56 +0200
Subject: [PATCH 11/73] Add vector datatypes operators

---
 OpenCL/inc_types.h | 657 +++++++++++++++++++++++++++++++++++++++++++++
 src/backend.c      |   7 -
 2 files changed, 657 insertions(+), 7 deletions(-)

diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index daa93cf2e..77997dd97 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -51,12 +51,669 @@ typedef u8   u8x;
 typedef u16  u16x;
 typedef u32  u32x;
 typedef u64  u64x;
+#else
+#ifdef IS_CUDA
+
+#if VECT_SIZE == 2
+
+class u8x
+{
+  private:
+  public:
+
+  u8 s0;
+  u8 s1;
+
+  inline __device__  u8x (const u8 a, const u8 b) : s0(a), s1(b) { }
+  inline __device__  u8x (const u8 a)             : s0(a), s1(a) { }
+
+  inline __device__  u8x (void) { }
+  inline __device__ ~u8x (void) { }
+};
+
+class u16x
+{
+  private:
+  public:
+
+  u16 s0;
+  u16 s1;
+
+  inline __device__  u16x (const u16 a, const u16 b) : s0(a), s1(b) { }
+  inline __device__  u16x (const u16 a)              : s0(a), s1(a) { }
+
+  inline __device__  u16x (void) { }
+  inline __device__ ~u16x (void) { }
+};
+
+class u32x
+{
+  private:
+  public:
+
+  u32 s0;
+  u32 s1;
+
+  inline __device__  u32x (const u32 a, const u32 b) : s0(a), s1(b) { }
+  inline __device__  u32x (const u32 a)              : s0(a), s1(a) { }
+
+  inline __device__  u32x (void) { }
+  inline __device__ ~u32x (void) { }
+};
+
+class u64x
+{
+  private:
+  public:
+
+  u64 s0;
+  u64 s1;
+
+  inline __device__  u64x (const u64 a, const u64 b) : s0(a), s1(b) { }
+  inline __device__  u64x (const u64 a)              : s0(a), s1(a) { }
+
+  inline __device__  u64x (void) { }
+  inline __device__ ~u64x (void) { }
+};
+
+inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b));    }
+inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
+
+inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;     }
+inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1;  }
+
+inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;     }
+inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1;  }
+
+inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;     }
+inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1;  }
+
+inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;     }
+inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1;  }
+
+inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;     }
+inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1;  }
+
+inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   );  }
+inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1));  }
+
+inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   );  }
+inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1));  }
+
+inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   );  }
+inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1));  }
+
+inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   );  }
+inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1));  }
+
+inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   );  }
+inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1));  }
+
+inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   );  }
+inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1));  }
+
+inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   );  }
+inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1));  }
+
+inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   );  }
+inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1));  }
+
+inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1); }
+
+inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b));    }
+inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
+
+inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;     }
+inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1;  }
+
+inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;     }
+inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1;  }
+
+inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;     }
+inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1;  }
+
+inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;     }
+inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1;  }
+
+inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;     }
+inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1;  }
+
+inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   );  }
+inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1));  }
+
+inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   );  }
+inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1));  }
+
+inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   );  }
+inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1));  }
+
+inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   );  }
+inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1));  }
+
+inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   );  }
+inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1));  }
+
+inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   );  }
+inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1));  }
+
+inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   );  }
+inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1));  }
+
+inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   );  }
+inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1));  }
+
+inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1); }
+
+#endif
+
+#if VECT_SIZE == 4
+
+class u8x
+{
+  private:
+  public:
+
+  u8 s0;
+  u8 s1;
+  u8 s2;
+  u8 s3;
+
+  inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d) : s0(a), s1(b), s2(c), s3(d) { }
+  inline __device__  u8x (const u8 a)                                     : s0(a), s1(a), s2(a), s3(a) { }
+
+  inline __device__  u8x (void) { }
+  inline __device__ ~u8x (void) { }
+};
+
+class u16x
+{
+  private:
+  public:
+
+  u16 s0;
+  u16 s1;
+  u16 s2;
+  u16 s3;
+
+  inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d) : s0(a), s1(b), s2(c), s3(d) { }
+  inline __device__  u16x (const u16 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
+
+  inline __device__  u16x (void) { }
+  inline __device__ ~u16x (void) { }
+};
+
+class u32x
+{
+  private:
+  public:
+
+  u32 s0;
+  u32 s1;
+  u32 s2;
+  u32 s3;
+
+  inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d) : s0(a), s1(b), s2(c), s3(d) { }
+  inline __device__  u32x (const u32 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
+
+  inline __device__  u32x (void) { }
+  inline __device__ ~u32x (void) { }
+};
+
+class u64x
+{
+  private:
+  public:
+
+  u64 s0;
+  u64 s1;
+  u64 s2;
+  u64 s3;
+
+  inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d) : s0(a), s1(b), s2(c), s3(d) { }
+  inline __device__  u64x (const u64 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
+
+  inline __device__  u64x (void) { }
+  inline __device__ ~u64x (void) { }
+};
+
+inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)   ); }
+inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
+
+inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;     }
+inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3;  }
+
+inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;     }
+inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3;  }
+
+inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;     }
+inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3;  }
+
+inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;     }
+inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3;  }
+
+inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;     }
+inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3;  }
+
+inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   );  }
+inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3));  }
+
+inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   );  }
+inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3));  }
+
+inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   );  }
+inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3));  }
+
+inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   );  }
+inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3));  }
+
+inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   );  }
+inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3));  }
+
+inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   );  }
+inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3));  }
+
+inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   );  }
+inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3));  }
+
+inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   );  }
+inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3));  }
+
+inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
+
+inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)   ); }
+inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
+
+inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;     }
+inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3;  }
+
+inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;     }
+inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3;  }
+
+inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;     }
+inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3;  }
+
+inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;     }
+inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3;  }
+
+inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;     }
+inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3;  }
+
+inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   );  }
+inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3));  }
+
+inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   );  }
+inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3));  }
+
+inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   );  }
+inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3));  }
+
+inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   );  }
+inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3));  }
+
+inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   );  }
+inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3));  }
+
+inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   );  }
+inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3));  }
+
+inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   );  }
+inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3));  }
+
+inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   );  }
+inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3));  }
+
+inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
+
+#endif
+
+#if VECT_SIZE == 8
+
+
+class u8x
+{
+  private:
+  public:
+
+  u8 s0;
+  u8 s1;
+  u8 s2;
+  u8 s3;
+  u8 s4;
+  u8 s5;
+  u8 s6;
+  u8 s7;
+
+  inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
+  inline __device__  u8x (const u8 a)                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
+
+  inline __device__  u8x (void) { }
+  inline __device__ ~u8x (void) { }
+};
+
+class u16x
+{
+  private:
+  public:
+
+  u16 s0;
+  u16 s1;
+  u16 s2;
+  u16 s3;
+  u16 s4;
+  u16 s5;
+  u16 s6;
+  u16 s7;
+
+  inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
+  inline __device__  u16x (const u16 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
+
+  inline __device__  u16x (void) { }
+  inline __device__ ~u16x (void) { }
+};
+
+class u32x
+{
+  private:
+  public:
+
+  u32 s0;
+  u32 s1;
+  u32 s2;
+  u32 s3;
+  u32 s4;
+  u32 s5;
+  u32 s6;
+  u32 s7;
+
+  inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
+  inline __device__  u32x (const u32 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
+
+  inline __device__  u32x (void) { }
+  inline __device__ ~u32x (void) { }
+};
+
+class u64x
+{
+  private:
+  public:
+
+  u64 s0;
+  u64 s1;
+  u64 s2;
+  u64 s3;
+  u64 s4;
+  u64 s5;
+  u64 s6;
+  u64 s7;
+
+  inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
+  inline __device__  u64x (const u64 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
+
+  inline __device__  u64x (void) { }
+  inline __device__ ~u64x (void) { }
+};
+
+inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)   ); }
+inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
+
+inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;     }
+inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7;  }
+
+inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;     }
+inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7;  }
+
+inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;     }
+inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7;  }
+
+inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;     }
+inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7;  }
+
+inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;     }
+inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7;  }
+
+inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b)   );  }
+inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7));  }
+
+inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b)   );  }
+inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7));  }
+
+inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b)   );  }
+inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7));  }
+
+inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b)   );  }
+inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7));  }
+
+inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b)   );  }
+inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7));  }
+
+inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b)   );  }
+inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7));  }
+
+inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b)   );  }
+inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7));  }
+
+inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b)   );  }
+inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7));  }
+
+inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
+
+#endif
+
+#if VECT_SIZE == 16
+
+class u8x
+{
+  private:
+  public:
+
+  u8 s0;
+  u8 s1;
+  u8 s2;
+  u8 s3;
+  u8 s4;
+  u8 s5;
+  u8 s6;
+  u8 s7;
+  u8 s8;
+  u8 s9;
+  u8 sa;
+  u8 sb;
+  u8 sc;
+  u8 sd;
+  u8 se;
+  u8 sf;
+
+  inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h, const u8 i, const u8 j, const u8 k, const u8 l, const u8 m, const u8 n, const u8 o, const u8 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
+  inline __device__  u8x (const u8 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
+
+  inline __device__  u8x (void) { }
+  inline __device__ ~u8x (void) { }
+};
+
+class u16x
+{
+  private:
+  public:
+
+  u16 s0;
+  u16 s1;
+  u16 s2;
+  u16 s3;
+  u16 s4;
+  u16 s5;
+  u16 s6;
+  u16 s7;
+  u16 s8;
+  u16 s9;
+  u16 sa;
+  u16 sb;
+  u16 sc;
+  u16 sd;
+  u16 se;
+  u16 sf;
+
+  inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h, const u16 i, const u16 j, const u16 k, const u16 l, const u16 m, const u16 n, const u16 o, const u16 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
+  inline __device__  u16x (const u16 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
+
+  inline __device__  u16x (void) { }
+  inline __device__ ~u16x (void) { }
+};
+
+class u32x
+{
+  private:
+  public:
+
+  u32 s0;
+  u32 s1;
+  u32 s2;
+  u32 s3;
+  u32 s4;
+  u32 s5;
+  u32 s6;
+  u32 s7;
+  u32 s8;
+  u32 s9;
+  u32 sa;
+  u32 sb;
+  u32 sc;
+  u32 sd;
+  u32 se;
+  u32 sf;
+
+  inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h, const u32 i, const u32 j, const u32 k, const u32 l, const u32 m, const u32 n, const u32 o, const u32 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
+  inline __device__  u32x (const u32 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
+
+  inline __device__  u32x (void) { }
+  inline __device__ ~u32x (void) { }
+};
+
+class u64x
+{
+  private:
+  public:
+
+  u64 s0;
+  u64 s1;
+  u64 s2;
+  u64 s3;
+  u64 s4;
+  u64 s5;
+  u64 s6;
+  u64 s7;
+  u64 s8;
+  u64 s9;
+  u64 sa;
+  u64 sb;
+  u64 sc;
+  u64 sd;
+  u64 se;
+  u64 sf;
+
+  inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h, const u64 i, const u64 j, const u64 k, const u64 l, const u64 m, const u64 n, const u64 o, const u64 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
+  inline __device__  u64x (const u64 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
+
+  inline __device__  u64x (void) { }
+  inline __device__ ~u64x (void) { }
+};
+
+inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)    && (a.s8 != b)    && (a.s9 != b)    && (a.sa != b)    && (a.sb != b)    && (a.sc != b)    && (a.sd != b)    && (a.se != b)    && (a.sf != b)   ); }
+inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
+
+inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;    a.s8 ^= b;    a.s9 ^= b;    a.sa ^= b;    a.sb ^= b;    a.sc ^= b;    a.sd ^= b;    a.se ^= b;    a.sf ^= b;    }
+inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
+
+inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;    a.s8 |= b;    a.s9 |= b;    a.sa |= b;    a.sb |= b;    a.sc |= b;    a.sd |= b;    a.se |= b;    a.sf |= b;    }
+inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
+
+inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;    a.s8 &= b;    a.s9 &= b;    a.sa &= b;    a.sb &= b;    a.sc &= b;    a.sd &= b;    a.se &= b;    a.sf &= b;    }
+inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
+
+inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;    a.s8 += b;    a.s9 += b;    a.sa += b;    a.sb += b;    a.sc += b;    a.sd += b;    a.se += b;    a.sf += b;    }
+inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
+
+inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;    a.s8 -= b;    a.s9 -= b;    a.sa -= b;    a.sb -= b;    a.sc -= b;    a.sd -= b;    a.se -= b;    a.sf -= b;    }
+inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
+
+inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b),    (a.s8 << b),    (a.s9 << b)   , (a.sa << b),    (a.sb << b)   , (a.sc << b),    (a.sd << b)   , (a.se << b),    (a.sf << b)   );  }
+inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf));  }
+
+inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b),    (a.s8 >> b),    (a.s9 >> b)   , (a.sa >> b),    (a.sb >> b)   , (a.sc >> b),    (a.sd >> b)   , (a.se >> b),    (a.sf >> b)   );  }
+inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf));  }
+
+inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b),    (a.s8 ^  b),    (a.s9 ^  b)   , (a.sa ^  b),    (a.sb ^  b)   , (a.sc ^  b),    (a.sd ^  b)   , (a.se ^  b),    (a.sf ^  b)   );  }
+inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7), (a.s8 ^  b.s8), (a.s9 ^  b.s9), (a.sa ^  b.sa), (a.sb ^  b.sb), (a.sc ^  b.sc), (a.sd ^  b.sd), (a.se ^  b.se), (a.sf ^  b.sf));  }
+
+inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b),    (a.s8 |  b),    (a.s9 |  b)   , (a.sa |  b),    (a.sb |  b)   , (a.sc |  b),    (a.sd |  b)   , (a.se |  b),    (a.sf |  b)   );  }
+inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7), (a.s8 |  b.s8), (a.s9 |  b.s9), (a.sa |  b.sa), (a.sb |  b.sb), (a.sc |  b.sc), (a.sd |  b.sd), (a.se |  b.se), (a.sf |  b.sf));  }
+
+inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b),    (a.s8 &  b),    (a.s9 &  b)   , (a.sa &  b),    (a.sb &  b)   , (a.sc &  b),    (a.sd &  b)   , (a.se &  b),    (a.sf &  b)   );  }
+inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7), (a.s8 &  b.s8), (a.s9 &  b.s9), (a.sa &  b.sa), (a.sb &  b.sb), (a.sc &  b.sc), (a.sd &  b.sd), (a.se &  b.se), (a.sf &  b.sf));  }
+
+inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b),    (a.s8 +  b),    (a.s9 +  b)   , (a.sa +  b),    (a.sb +  b)   , (a.sc +  b),    (a.sd +  b)   , (a.se +  b),    (a.sf +  b)   );  }
+inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7), (a.s8 +  b.s8), (a.s9 +  b.s9), (a.sa +  b.sa), (a.sb +  b.sb), (a.sc +  b.sc), (a.sd +  b.sd), (a.se +  b.se), (a.sf +  b.sf));  }
+
+inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b),    (a.s8 -  b),    (a.s9 -  b)   , (a.sa -  b),    (a.sb -  b)   , (a.sc -  b),    (a.sd -  b)   , (a.se -  b),    (a.sf -  b)   );  }
+inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7), (a.s8 -  b.s8), (a.s9 -  b.s9), (a.sa -  b.sa), (a.sb -  b.sb), (a.sc -  b.sc), (a.sd -  b.sd), (a.se -  b.se), (a.sf -  b.sf));  }
+
+inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b),    (a.s8 *  b),    (a.s9 *  b)   , (a.sa *  b),    (a.sb *  b)   , (a.sc *  b),    (a.sd *  b)   , (a.se *  b),    (a.sf *  b)   );  }
+inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7), (a.s8 *  b.s8), (a.s9 *  b.s9), (a.sa *  b.sa), (a.sb *  b.sb), (a.sc *  b.sc), (a.sd *  b.sd), (a.se *  b.se), (a.sf *  b.sf));  }
+
+inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
+
+inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)    && (a.s8 != b)    && (a.s9 != b)    && (a.sa != b)    && (a.sb != b)    && (a.sc != b)    && (a.sd != b)    && (a.se != b)    && (a.sf != b)   ); }
+inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
+
+inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;    a.s8 ^= b;    a.s9 ^= b;    a.sa ^= b;    a.sb ^= b;    a.sc ^= b;    a.sd ^= b;    a.se ^= b;    a.sf ^= b;    }
+inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
+
+inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;    a.s8 |= b;    a.s9 |= b;    a.sa |= b;    a.sb |= b;    a.sc |= b;    a.sd |= b;    a.se |= b;    a.sf |= b;    }
+inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
+
+inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;    a.s8 &= b;    a.s9 &= b;    a.sa &= b;    a.sb &= b;    a.sc &= b;    a.sd &= b;    a.se &= b;    a.sf &= b;    }
+inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
+
+inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;    a.s8 += b;    a.s9 += b;    a.sa += b;    a.sb += b;    a.sc += b;    a.sd += b;    a.se += b;    a.sf += b;    }
+inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
+
+inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;    a.s8 -= b;    a.s9 -= b;    a.sa -= b;    a.sb -= b;    a.sc -= b;    a.sd -= b;    a.se -= b;    a.sf -= b;    }
+inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
+
+inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b),    (a.s8 << b),    (a.s9 << b)   , (a.sa << b),    (a.sb << b)   , (a.sc << b),    (a.sd << b)   , (a.se << b),    (a.sf << b)   );  }
+inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf));  }
+
+inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b),    (a.s8 >> b),    (a.s9 >> b)   , (a.sa >> b),    (a.sb >> b)   , (a.sc >> b),    (a.sd >> b)   , (a.se >> b),    (a.sf >> b)   );  }
+inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf));  }
+
+inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b),    (a.s8 ^  b),    (a.s9 ^  b)   , (a.sa ^  b),    (a.sb ^  b)   , (a.sc ^  b),    (a.sd ^  b)   , (a.se ^  b),    (a.sf ^  b)   );  }
+inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7), (a.s8 ^  b.s8), (a.s9 ^  b.s9), (a.sa ^  b.sa), (a.sb ^  b.sb), (a.sc ^  b.sc), (a.sd ^  b.sd), (a.se ^  b.se), (a.sf ^  b.sf));  }
+
+inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b),    (a.s8 |  b),    (a.s9 |  b)   , (a.sa |  b),    (a.sb |  b)   , (a.sc |  b),    (a.sd |  b)   , (a.se |  b),    (a.sf |  b)   );  }
+inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7), (a.s8 |  b.s8), (a.s9 |  b.s9), (a.sa |  b.sa), (a.sb |  b.sb), (a.sc |  b.sc), (a.sd |  b.sd), (a.se |  b.se), (a.sf |  b.sf));  }
+
+inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b),    (a.s8 &  b),    (a.s9 &  b)   , (a.sa &  b),    (a.sb &  b)   , (a.sc &  b),    (a.sd &  b)   , (a.se &  b),    (a.sf &  b)   );  }
+inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7), (a.s8 &  b.s8), (a.s9 &  b.s9), (a.sa &  b.sa), (a.sb &  b.sb), (a.sc &  b.sc), (a.sd &  b.sd), (a.se &  b.se), (a.sf &  b.sf));  }
+
+inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b),    (a.s8 +  b),    (a.s9 +  b)   , (a.sa +  b),    (a.sb +  b)   , (a.sc +  b),    (a.sd +  b)   , (a.se +  b),    (a.sf +  b)   );  }
+inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7), (a.s8 +  b.s8), (a.s9 +  b.s9), (a.sa +  b.sa), (a.sb +  b.sb), (a.sc +  b.sc), (a.sd +  b.sd), (a.se +  b.se), (a.sf +  b.sf));  }
+
+inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b),    (a.s8 -  b),    (a.s9 -  b)   , (a.sa -  b),    (a.sb -  b)   , (a.sc -  b),    (a.sd -  b)   , (a.se -  b),    (a.sf -  b)   );  }
+inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7), (a.s8 -  b.s8), (a.s9 -  b.s9), (a.sa -  b.sa), (a.sb -  b.sb), (a.sc -  b.sc), (a.sd -  b.sd), (a.se -  b.se), (a.sf -  b.sf));  }
+
+inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b),    (a.s8 *  b),    (a.s9 *  b)   , (a.sa *  b),    (a.sb *  b)   , (a.sc *  b),    (a.sd *  b)   , (a.se *  b),    (a.sf *  b)   );  }
+inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7), (a.s8 *  b.s8), (a.s9 *  b.s9), (a.sa *  b.sa), (a.sb *  b.sb), (a.sc *  b.sc), (a.sd *  b.sd), (a.se *  b.se), (a.sf *  b.sf));  }
+
+inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
+
+#endif
+
 #else
 typedef VTYPE(uchar,  VECT_SIZE)  u8x;
 typedef VTYPE(ushort, VECT_SIZE) u16x;
 typedef VTYPE(uint,   VECT_SIZE) u32x;
 typedef VTYPE(ulong,  VECT_SIZE) u64x;
 #endif
+#endif
 
 // unions
 
diff --git a/src/backend.c b/src/backend.c
index 4cebf4ecb..80cb093e1 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5085,13 +5085,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (vector_width > 16) vector_width = 16;
 
-    // CUDA currently support only scalar types
-
-    if (backend_ctx->cuda)
-    {
-      vector_width = 1;
-    }
-
     device_param->vector_width = vector_width;
 
     /**

From a415422123895d75aa3b4eea1995204f7d25394b Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 28 Apr 2019 14:45:50 +0200
Subject: [PATCH 12/73] Initialize CUDA devices and some first attribute
 queries

---
 include/backend.h |    6 +
 include/types.h   |   12 +
 src/backend.c     | 1873 +++++++++++++++++++++++++--------------------
 3 files changed, 1058 insertions(+), 833 deletions(-)

diff --git a/include/backend.h b/include/backend.h
index 7141288fe..d879386aa 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -39,6 +39,12 @@ int hc_nvrtcGetProgramLog        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog,
 int hc_nvrtcGetPTXSize           (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet);
 int hc_nvrtcGetPTX               (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx);
 
+int hc_cuInit                    (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
+int hc_cuDeviceGetAttribute      (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev);
+int hc_cuDeviceGetCount          (hashcat_ctx_t *hashcat_ctx, int *count);
+int hc_cuDeviceGet               (hashcat_ctx_t *hashcat_ctx, CUdevice *device, int ordinal);
+int hc_cuDeviceGetName           (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev);
+
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
 int hc_clCreateCommandQueue      (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_command_queue *command_queue);
diff --git a/include/types.h b/include/types.h
index 6e2bfdcac..954b1ee2a 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1340,6 +1340,18 @@ typedef struct backend_ctx
   void               *cuda;
   void               *nvrtc;
 
+  int                *backend_device_from_cuda;   // from cuda device index to backend device index
+  int                *backend_device_to_cuda;     // from backend device index to cuda device index
+  int                *backend_device_from_opencl; // from opencl device index to backend device index
+  int                *backend_device_to_opencl;   // from backend device index to opencl device index
+
+  int                 backend_devices_cnt;
+  int                 backend_devices_active;
+  int                 cuda_devices_cnt;
+  int                 cuda_devices_active;
+  int                 opencl_devices_cnt;
+  int                 opencl_devices_active;
+
   cl_uint             platforms_cnt;
   cl_platform_id     *platforms;
   char              **platforms_vendor;
diff --git a/src/backend.c b/src/backend.c
index 80cb093e1..476d47c22 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -851,6 +851,33 @@ void cuda_close (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
+int hc_cuInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuInit (Flags);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuInit(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuInit(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -878,6 +905,88 @@ int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attri
   return 0;
 }
 
+int hc_cuDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuDeviceGetCount (count);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuDeviceGetCount(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuDeviceGetCount(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuDeviceGet (hashcat_ctx_t *hashcat_ctx, CUdevice* device, int ordinal)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuDeviceGet (device, ordinal);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuDeviceGet(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuDeviceGet(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuDeviceGetName (name, len, dev);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuDeviceGetName(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuDeviceGetName(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+
 // OpenCL
 
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
@@ -3286,7 +3395,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
   backend_ctx->devices_param = devices_param;
 
   /**
-   * Load and map CUDA library calls
+   * Load and map CUDA library calls, then init CUDA
    */
 
   CUDA_PTR *cuda = (CUDA_PTR *) hcmalloc (sizeof (CUDA_PTR));
@@ -3300,6 +3409,13 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     cuda_close (hashcat_ctx);
   }
 
+  const int rc_cuInit = hc_cuInit (hashcat_ctx, 0);
+
+  if (rc_cuInit == -1)
+  {
+    cuda_close (hashcat_ctx);
+  }
+
   /**
    * Load and map NVRTC library calls
    */
@@ -3393,6 +3509,29 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   backend_ctx->device_types_filter = device_types_filter;
 
+  /**
+   * Backend structures
+   */
+
+  #define FREE_BACKEND_CTX_ON_ERROR       \
+  {                                       \
+    hcfree (backend_device_from_cuda);    \
+    hcfree (backend_device_to_cuda);      \
+    hcfree (backend_device_from_opencl);  \
+    hcfree (backend_device_to_opencl);    \
+    hcfree (platforms_vendor);            \
+    hcfree (platforms_name);              \
+    hcfree (platforms_version);           \
+    hcfree (platforms_skipped);           \
+    hcfree (platforms);                   \
+    hcfree (platform_devices);            \
+  }
+
+  int *backend_device_from_cuda   = (int *) hccalloc (DEVICES_MAX, sizeof (int));
+  int *backend_device_to_cuda     = (int *) hccalloc (DEVICES_MAX, sizeof (int));
+  int *backend_device_from_opencl = (int *) hccalloc (DEVICES_MAX, sizeof (int));
+  int *backend_device_to_opencl   = (int *) hccalloc (DEVICES_MAX, sizeof (int));
+
   /**
    * OpenCL platforms: detect
    */
@@ -3408,19 +3547,9 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   int CL_rc = hc_clGetPlatformIDs (hashcat_ctx, CL_PLATFORMS_MAX, platforms, &platforms_cnt);
 
-  #define FREE_OPENCL_CTX_ON_ERROR \
-  {                                \
-      hcfree (platforms_vendor);   \
-      hcfree (platforms_name);     \
-      hcfree (platforms_version);  \
-      hcfree (platforms_skipped);  \
-      hcfree (platforms);          \
-      hcfree (platform_devices);   \
-  }
-
   if (CL_rc == -1)
   {
-    FREE_OPENCL_CTX_ON_ERROR;
+    FREE_BACKEND_CTX_ON_ERROR;
 
     return -1;
   }
@@ -3456,7 +3585,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
     event_log_warning (hashcat_ctx, NULL);
 
-    FREE_OPENCL_CTX_ON_ERROR;
+    FREE_BACKEND_CTX_ON_ERROR;
 
     return -1;
   }
@@ -3470,7 +3599,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
       event_log_error (hashcat_ctx, "An invalid platform was specified using the --opencl-platforms parameter.");
       event_log_error (hashcat_ctx, "The specified platform was higher than the number of available platforms (%u).", platforms_cnt);
 
-      FREE_OPENCL_CTX_ON_ERROR;
+      FREE_BACKEND_CTX_ON_ERROR;
 
       return -1;
     }
@@ -3505,7 +3634,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
         if (CL_rc == -1)
         {
-          FREE_OPENCL_CTX_ON_ERROR;
+          FREE_BACKEND_CTX_ON_ERROR;
 
           return -1;
         }
@@ -3537,6 +3666,11 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   backend_ctx->enabled = true;
 
+  backend_ctx->backend_device_from_cuda   = backend_device_from_cuda;
+  backend_ctx->backend_device_to_cuda     = backend_device_to_cuda;
+  backend_ctx->backend_device_from_opencl = backend_device_from_opencl;
+  backend_ctx->backend_device_to_opencl   = backend_device_to_opencl;
+
   backend_ctx->platforms_vendor      = platforms_vendor;
   backend_ctx->platforms_name        = platforms_name;
   backend_ctx->platforms_version     = platforms_version;
@@ -3546,6 +3680,8 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
   backend_ctx->platform_devices_cnt  = platform_devices_cnt;
   backend_ctx->platform_devices      = platform_devices;
 
+  #undef FREE_BACKEND_CTX_ON_ERROR
+
   return 0;
 }
 
@@ -3555,11 +3691,17 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return;
 
-  cuda_close (hashcat_ctx);
-  ocl_close  (hashcat_ctx);
+  nvrtc_close (hashcat_ctx);
+  cuda_close  (hashcat_ctx);
+  ocl_close   (hashcat_ctx);
 
   hcfree (backend_ctx->devices_param);
 
+  hcfree (backend_ctx->backend_device_from_cuda);
+  hcfree (backend_ctx->backend_device_to_cuda);
+  hcfree (backend_ctx->backend_device_from_opencl);
+  hcfree (backend_ctx->backend_device_to_opencl);
+
   hcfree (backend_ctx->platforms);
   hcfree (backend_ctx->platform_devices);
   hcfree (backend_ctx->platforms_vendor);
@@ -3577,629 +3719,694 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   if (backend_ctx->enabled == false) return 0;
 
-  /**
-   * OpenCL devices: simply push all devices from all platforms into the same device array
-   */
-
-  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
-  cl_platform_id *platforms             = backend_ctx->platforms;
-  cl_uint         platform_devices_cnt  = backend_ctx->platform_devices_cnt;
-  cl_device_id   *platform_devices      = backend_ctx->platform_devices;
-
   bool need_adl     = false;
   bool need_nvml    = false;
   bool need_nvapi   = false;
   bool need_sysfs   = false;
 
+  int backend_devices_idx    = 0;
+  int backend_devices_cnt    = 0;
+  int backend_devices_active = 0;
+
+  if (backend_ctx->cuda)
+  {
+    int cuda_devices_cnt = 0;
+
+    const int rc_cuDeviceGetCount = hc_cuDeviceGetCount (hashcat_ctx, &cuda_devices_cnt);
+
+    if (rc_cuDeviceGetCount == -1)
+    {
+      cuda_close (hashcat_ctx);
+    }
+
+    backend_ctx->cuda_devices_cnt = cuda_devices_cnt;
+
+    backend_devices_cnt += cuda_devices_cnt;
+
+    hc_device_param_t *devices_param = backend_ctx->devices_param;
+
+    for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++, backend_devices_idx++)
+    {
+      hc_device_param_t *device_param = &devices_param[backend_devices_idx];
+
+      backend_ctx->backend_device_from_cuda[cuda_devices_idx]  = backend_devices_idx;
+      backend_ctx->backend_device_to_cuda[backend_devices_idx] = cuda_devices_idx;
+
+      CUdevice device_cuda;
+
+      int CU_rc;
+
+      CU_rc = hc_cuDeviceGet (hashcat_ctx, &device_cuda, cuda_devices_idx);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->device_cuda = device_cuda;
+
+      // device_name
+
+      char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY);
+
+      CU_rc = hc_cuDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->device_name = device_name;
+
+      hc_string_trim_leading (device_name);
+
+      hc_string_trim_trailing (device_name);
+
+      // sm_minor, sm_major
+
+      int sm_major = 0;
+      int sm_minor = 0;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->sm_major = sm_major;
+      device_param->sm_minor = sm_minor;
+
+
+    printf ("%s %d %d\n", device_name, sm_major, sm_minor);
+
+    }
+  }
+
+  backend_ctx->backend_devices_cnt    = backend_devices_cnt;
+  backend_ctx->backend_devices_active = backend_devices_active;
+
   u32 devices_cnt = 0;
 
   u32 devices_active = 0;
 
-  for (u32 platform_id = 0; platform_id < platforms_cnt; platform_id++)
+  if (backend_ctx->ocl)
   {
-    size_t param_value_size = 0;
+    /**
+     * OpenCL devices: simply push all devices from all platforms into the same device array
+     */
 
-    cl_platform_id platform = platforms[platform_id];
+    cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
+    cl_platform_id *platforms             = backend_ctx->platforms;
+    cl_uint         platform_devices_cnt  = backend_ctx->platform_devices_cnt;
+    cl_device_id   *platform_devices      = backend_ctx->platform_devices;
 
-    // platform vendor
-
-    int CL_rc;
-    int CU_rc;
-
-    CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VENDOR, 0, NULL, &param_value_size);
-
-    if (CL_rc == -1) return -1;
-
-    char *platform_vendor = (char *) hcmalloc (param_value_size);
-
-    CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VENDOR, param_value_size, platform_vendor, NULL);
-
-    if (CL_rc == -1) return -1;
-
-    backend_ctx->platforms_vendor[platform_id] = platform_vendor;
-
-    // platform name
-
-    CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_NAME, 0, NULL, &param_value_size);
-
-    if (CL_rc == -1) return -1;
-
-    char *platform_name = (char *) hcmalloc (param_value_size);
-
-    CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_NAME, param_value_size, platform_name, NULL);
-
-    if (CL_rc == -1) return -1;
-
-    backend_ctx->platforms_name[platform_id] = platform_name;
-
-    // platform version
-
-    CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VERSION, 0, NULL, &param_value_size);
-
-    if (CL_rc == -1) return -1;
-
-    char *platform_version = (char *) hcmalloc (param_value_size);
-
-    CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VERSION, param_value_size, platform_version, NULL);
-
-    if (CL_rc == -1) return -1;
-
-    backend_ctx->platforms_version[platform_id] = platform_version;
-
-    // find our own platform vendor because pocl and mesa are pushing original vendor_id through opencl
-    // this causes trouble with vendor id based macros
-    // we'll assign generic to those without special optimization available
-
-    cl_uint platform_vendor_id = 0;
-
-    if (strcmp (platform_vendor, CL_VENDOR_AMD1) == 0)
+    for (u32 platform_id = 0; platform_id < platforms_cnt; platform_id++)
     {
-      platform_vendor_id = VENDOR_ID_AMD;
-    }
-    else if (strcmp (platform_vendor, CL_VENDOR_AMD2) == 0)
-    {
-      platform_vendor_id = VENDOR_ID_AMD;
-    }
-    else if (strcmp (platform_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
-    {
-      platform_vendor_id = VENDOR_ID_AMD_USE_INTEL;
-    }
-    else if (strcmp (platform_vendor, CL_VENDOR_APPLE) == 0)
-    {
-      platform_vendor_id = VENDOR_ID_APPLE;
-    }
-    else if (strcmp (platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
-    {
-      platform_vendor_id = VENDOR_ID_INTEL_BEIGNET;
-    }
-    else if (strcmp (platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
-    {
-      platform_vendor_id = VENDOR_ID_INTEL_SDK;
-    }
-    else if (strcmp (platform_vendor, CL_VENDOR_MESA) == 0)
-    {
-      platform_vendor_id = VENDOR_ID_MESA;
-    }
-    else if (strcmp (platform_vendor, CL_VENDOR_NV) == 0)
-    {
-      platform_vendor_id = VENDOR_ID_NV;
-    }
-    else if (strcmp (platform_vendor, CL_VENDOR_POCL) == 0)
-    {
-      platform_vendor_id = VENDOR_ID_POCL;
-    }
-    else
-    {
-      platform_vendor_id = VENDOR_ID_GENERIC;
-    }
+      size_t param_value_size = 0;
 
-    bool platform_skipped = ((backend_ctx->opencl_platforms_filter & (1ULL << platform_id)) == 0);
+      cl_platform_id platform = platforms[platform_id];
 
-    CL_rc = hc_clGetDeviceIDs (hashcat_ctx, platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, platform_devices, &platform_devices_cnt);
+      // platform vendor
 
-    if (CL_rc == -1)
-    {
-      //event_log_error (hashcat_ctx, "clGetDeviceIDs(): %s", val2cstr_cl (CL_rc));
+      int CL_rc;
 
-      //return -1;
-
-      platform_skipped = true;
-    }
-
-    backend_ctx->platforms_skipped[platform_id] = platform_skipped;
-
-    if (platform_skipped == true) continue;
-
-    if (user_options->force == false)
-    {
-      if (platform_vendor_id == VENDOR_ID_MESA)
-      {
-        event_log_error (hashcat_ctx, "Mesa (Gallium) OpenCL platform detected!");
-
-        event_log_warning (hashcat_ctx, "The Mesa platform can cause errors that are often mistaken for bugs in hashcat.");
-        event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the drivers listed in docs/readme.txt.");
-        event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
-        event_log_warning (hashcat_ctx, "You can also use --opencl-platforms to skip the Mesa platform(s).");
-        event_log_warning (hashcat_ctx, NULL);
-
-        return -1;
-      }
-    }
-
-    hc_device_param_t *devices_param = backend_ctx->devices_param;
-
-    for (u32 platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
-    {
-      const u32 device_id = devices_cnt;
-
-      hc_device_param_t *device_param = &devices_param[device_id];
-
-      device_param->platform_vendor_id = platform_vendor_id;
-
-      device_param->device = platform_devices[platform_devices_id];
-
-      device_param->device_id = device_id;
-
-      device_param->platform_devices_id = platform_devices_id;
-
-      device_param->platform = platform;
-
-      // device_type
-
-      cl_device_type device_type;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_TYPE, sizeof (device_type), &device_type, NULL);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VENDOR, 0, NULL, &param_value_size);
 
       if (CL_rc == -1) return -1;
 
-      device_type &= ~CL_DEVICE_TYPE_DEFAULT;
+      char *platform_vendor = (char *) hcmalloc (param_value_size);
 
-      device_param->device_type = device_type;
-
-      // device_name
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_NAME, 0, NULL, &param_value_size);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VENDOR, param_value_size, platform_vendor, NULL);
 
       if (CL_rc == -1) return -1;
 
-      char *device_name = (char *) hcmalloc (param_value_size);
+      backend_ctx->platforms_vendor[platform_id] = platform_vendor;
 
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_NAME, param_value_size, device_name, NULL);
+      // platform name
+
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_NAME, 0, NULL, &param_value_size);
 
       if (CL_rc == -1) return -1;
 
-      device_param->device_name = device_name;
+      char *platform_name = (char *) hcmalloc (param_value_size);
 
-      hc_string_trim_leading (device_param->device_name);
-
-      hc_string_trim_trailing (device_param->device_name);
-
-      // device_vendor
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VENDOR, 0, NULL, &param_value_size);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_NAME, param_value_size, platform_name, NULL);
 
       if (CL_rc == -1) return -1;
 
-      char *device_vendor = (char *) hcmalloc (param_value_size);
+      backend_ctx->platforms_name[platform_id] = platform_name;
 
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VENDOR, param_value_size, device_vendor, NULL);
+      // platform version
+
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VERSION, 0, NULL, &param_value_size);
 
       if (CL_rc == -1) return -1;
 
-      device_param->device_vendor = device_vendor;
+      char *platform_version = (char *) hcmalloc (param_value_size);
 
-      cl_uint device_vendor_id = 0;
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VERSION, param_value_size, platform_version, NULL);
 
-      if (strcmp (device_vendor, CL_VENDOR_AMD1) == 0)
+      if (CL_rc == -1) return -1;
+
+      backend_ctx->platforms_version[platform_id] = platform_version;
+
+      // find our own platform vendor because pocl and mesa are pushing original vendor_id through opencl
+      // this causes trouble with vendor id based macros
+      // we'll assign generic to those without special optimization available
+
+      cl_uint platform_vendor_id = 0;
+
+      if (strcmp (platform_vendor, CL_VENDOR_AMD1) == 0)
       {
-        device_vendor_id = VENDOR_ID_AMD;
+        platform_vendor_id = VENDOR_ID_AMD;
       }
-      else if (strcmp (device_vendor, CL_VENDOR_AMD2) == 0)
+      else if (strcmp (platform_vendor, CL_VENDOR_AMD2) == 0)
       {
-        device_vendor_id = VENDOR_ID_AMD;
+        platform_vendor_id = VENDOR_ID_AMD;
       }
-      else if (strcmp (device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+      else if (strcmp (platform_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
       {
-        device_vendor_id = VENDOR_ID_AMD_USE_INTEL;
+        platform_vendor_id = VENDOR_ID_AMD_USE_INTEL;
       }
-      else if (strcmp (device_vendor, CL_VENDOR_APPLE) == 0)
+      else if (strcmp (platform_vendor, CL_VENDOR_APPLE) == 0)
       {
-        device_vendor_id = VENDOR_ID_APPLE;
+        platform_vendor_id = VENDOR_ID_APPLE;
       }
-      else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_AMD) == 0)
+      else if (strcmp (platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
       {
-        device_vendor_id = VENDOR_ID_AMD;
+        platform_vendor_id = VENDOR_ID_INTEL_BEIGNET;
       }
-      else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_NV) == 0)
+      else if (strcmp (platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
       {
-        device_vendor_id = VENDOR_ID_NV;
+        platform_vendor_id = VENDOR_ID_INTEL_SDK;
       }
-      else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_INTEL) == 0)
+      else if (strcmp (platform_vendor, CL_VENDOR_MESA) == 0)
       {
-        device_vendor_id = VENDOR_ID_INTEL_SDK;
+        platform_vendor_id = VENDOR_ID_MESA;
       }
-      else if (strcmp (device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+      else if (strcmp (platform_vendor, CL_VENDOR_NV) == 0)
       {
-        device_vendor_id = VENDOR_ID_INTEL_BEIGNET;
+        platform_vendor_id = VENDOR_ID_NV;
       }
-      else if (strcmp (device_vendor, CL_VENDOR_INTEL_SDK) == 0)
+      else if (strcmp (platform_vendor, CL_VENDOR_POCL) == 0)
       {
-        device_vendor_id = VENDOR_ID_INTEL_SDK;
-      }
-      else if (strcmp (device_vendor, CL_VENDOR_MESA) == 0)
-      {
-        device_vendor_id = VENDOR_ID_MESA;
-      }
-      else if (strcmp (device_vendor, CL_VENDOR_NV) == 0)
-      {
-        device_vendor_id = VENDOR_ID_NV;
-      }
-      else if (strcmp (device_vendor, CL_VENDOR_POCL) == 0)
-      {
-        device_vendor_id = VENDOR_ID_POCL;
+        platform_vendor_id = VENDOR_ID_POCL;
       }
       else
       {
-        device_vendor_id = VENDOR_ID_GENERIC;
+        platform_vendor_id = VENDOR_ID_GENERIC;
       }
 
-      device_param->device_vendor_id = device_vendor_id;
+      bool platform_skipped = ((backend_ctx->opencl_platforms_filter & (1ULL << platform_id)) == 0);
 
-      // device_version
+      CL_rc = hc_clGetDeviceIDs (hashcat_ctx, platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, platform_devices, &platform_devices_cnt);
 
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VERSION, 0, NULL, &param_value_size);
-
-      if (CL_rc == -1) return -1;
-
-      char *device_version = (char *) hcmalloc (param_value_size);
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VERSION, param_value_size, device_version, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->device_version = device_version;
-
-      // device_opencl_version
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &param_value_size);
-
-      if (CL_rc == -1) return -1;
-
-      char *device_opencl_version = (char *) hcmalloc (param_value_size);
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, device_opencl_version, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->device_opencl_version = device_opencl_version;
-
-      // max_compute_units
-
-      cl_uint device_processors;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (device_processors), &device_processors, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->device_processors = device_processors;
-
-      // device_global_mem
-
-      cl_ulong device_global_mem;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (device_global_mem), &device_global_mem, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->device_global_mem = device_global_mem;
-
-      device_param->device_available_mem = 0;
-
-      // device_maxmem_alloc
-
-      cl_ulong device_maxmem_alloc;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->device_maxmem_alloc = device_maxmem_alloc;
-
-      // note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes
-      // testwise disabling that
-      //device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff);
-
-      // max_work_group_size
-
-      size_t device_maxworkgroup_size;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->device_maxworkgroup_size = device_maxworkgroup_size;
-
-      // max_clock_frequency
-
-      cl_uint device_maxclock_frequency;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (device_maxclock_frequency), &device_maxclock_frequency, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->device_maxclock_frequency = device_maxclock_frequency;
-
-      // device_endian_little
-
-      cl_bool device_endian_little;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_ENDIAN_LITTLE, sizeof (device_endian_little), &device_endian_little, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      if (device_endian_little == CL_FALSE)
+      if (CL_rc == -1)
       {
-        event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", device_id + 1);
+        //event_log_error (hashcat_ctx, "clGetDeviceIDs(): %s", val2cstr_cl (CL_rc));
 
-        device_param->skipped = true;
+        //return -1;
+
+        platform_skipped = true;
       }
 
-      // device_available
+      backend_ctx->platforms_skipped[platform_id] = platform_skipped;
 
-      cl_bool device_available;
+      if (platform_skipped == true) continue;
 
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_AVAILABLE, sizeof (device_available), &device_available, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      if (device_available == CL_FALSE)
+      if (user_options->force == false)
       {
-        event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", device_id + 1);
-
-        device_param->skipped = true;
-      }
-
-      // device_compiler_available
-
-      cl_bool device_compiler_available;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPILER_AVAILABLE, sizeof (device_compiler_available), &device_compiler_available, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      if (device_compiler_available == CL_FALSE)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", device_id + 1);
-
-        device_param->skipped = true;
-      }
-
-      // device_execution_capabilities
-
-      cl_device_exec_capabilities device_execution_capabilities;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof (device_execution_capabilities), &device_execution_capabilities, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", device_id + 1);
-
-        device_param->skipped = true;
-      }
-
-      // device_extensions
-
-      size_t device_extensions_size;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXTENSIONS, 0, NULL, &device_extensions_size);
-
-      if (CL_rc == -1) return -1;
-
-      char *device_extensions = hcmalloc (device_extensions_size + 1);
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXTENSIONS, device_extensions_size, device_extensions, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      if (strstr (device_extensions, "base_atomics") == 0)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", device_id + 1);
-
-        device_param->skipped = true;
-      }
-
-      if (strstr (device_extensions, "byte_addressable_store") == 0)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", device_id + 1);
-
-        device_param->skipped = true;
-      }
-
-      hcfree (device_extensions);
-
-      // device_max_constant_buffer_size
-
-      cl_ulong device_max_constant_buffer_size;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (device_max_constant_buffer_size), &device_max_constant_buffer_size, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      if (device_max_constant_buffer_size < 65536)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", device_id + 1);
-
-        device_param->skipped = true;
-      }
-
-      // device_local_mem_size
-
-      cl_ulong device_local_mem_size;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (device_local_mem_size), &device_local_mem_size, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      if (device_local_mem_size < 32768)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
-
-        device_param->skipped = true;
-      }
-
-      device_param->device_local_mem_size = device_local_mem_size;
-
-      // device_local_mem_type
-
-      cl_device_local_mem_type device_local_mem_type;
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->device_local_mem_type = device_local_mem_type;
-
-      // If there's both an Intel CPU and an AMD OpenCL runtime it's a tricky situation
-      // Both platforms support CPU device types and therefore both will try to use 100% of the physical resources
-      // This results in both utilizing it for 50%
-      // However, Intel has much better SIMD control over their own hardware
-      // It makes sense to give them full control over their own hardware
-
-      if (device_type & CL_DEVICE_TYPE_CPU)
-      {
-        if (device_param->device_vendor_id == VENDOR_ID_AMD_USE_INTEL)
+        if (platform_vendor_id == VENDOR_ID_MESA)
         {
-          if (user_options->force == false)
-          {
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Not a native Intel OpenCL runtime. Expect massive speed loss.", device_id + 1);
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
+          event_log_error (hashcat_ctx, "Mesa (Gallium) OpenCL platform detected!");
 
-            device_param->skipped = true;
+          event_log_warning (hashcat_ctx, "The Mesa platform can cause errors that are often mistaken for bugs in hashcat.");
+          event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the drivers listed in docs/readme.txt.");
+          event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
+          event_log_warning (hashcat_ctx, "You can also use --opencl-platforms to skip the Mesa platform(s).");
+          event_log_warning (hashcat_ctx, NULL);
+
+          return -1;
+        }
+      }
+
+      hc_device_param_t *devices_param = backend_ctx->devices_param;
+
+      for (u32 platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
+      {
+        const u32 device_id = devices_cnt;
+
+        hc_device_param_t *device_param = &devices_param[device_id];
+
+        device_param->platform_vendor_id = platform_vendor_id;
+
+        device_param->device = platform_devices[platform_devices_id];
+
+        device_param->device_id = device_id;
+
+        device_param->platform_devices_id = platform_devices_id;
+
+        device_param->platform = platform;
+
+        // device_type
+
+        cl_device_type device_type;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_TYPE, sizeof (device_type), &device_type, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_type &= ~CL_DEVICE_TYPE_DEFAULT;
+
+        device_param->device_type = device_type;
+
+        // device_name
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_NAME, 0, NULL, &param_value_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *device_name = (char *) hcmalloc (param_value_size);
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_NAME, param_value_size, device_name, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_name = device_name;
+
+        hc_string_trim_leading (device_param->device_name);
+
+        hc_string_trim_trailing (device_param->device_name);
+
+        // device_vendor
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VENDOR, 0, NULL, &param_value_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *device_vendor = (char *) hcmalloc (param_value_size);
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VENDOR, param_value_size, device_vendor, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_vendor = device_vendor;
+
+        cl_uint device_vendor_id = 0;
+
+        if (strcmp (device_vendor, CL_VENDOR_AMD1) == 0)
+        {
+          device_vendor_id = VENDOR_ID_AMD;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_AMD2) == 0)
+        {
+          device_vendor_id = VENDOR_ID_AMD;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+        {
+          device_vendor_id = VENDOR_ID_AMD_USE_INTEL;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_APPLE) == 0)
+        {
+          device_vendor_id = VENDOR_ID_APPLE;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_AMD) == 0)
+        {
+          device_vendor_id = VENDOR_ID_AMD;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_NV) == 0)
+        {
+          device_vendor_id = VENDOR_ID_NV;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_INTEL) == 0)
+        {
+          device_vendor_id = VENDOR_ID_INTEL_SDK;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+        {
+          device_vendor_id = VENDOR_ID_INTEL_BEIGNET;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_INTEL_SDK) == 0)
+        {
+          device_vendor_id = VENDOR_ID_INTEL_SDK;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_MESA) == 0)
+        {
+          device_vendor_id = VENDOR_ID_MESA;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_NV) == 0)
+        {
+          device_vendor_id = VENDOR_ID_NV;
+        }
+        else if (strcmp (device_vendor, CL_VENDOR_POCL) == 0)
+        {
+          device_vendor_id = VENDOR_ID_POCL;
+        }
+        else
+        {
+          device_vendor_id = VENDOR_ID_GENERIC;
+        }
+
+        device_param->device_vendor_id = device_vendor_id;
+
+        // device_version
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VERSION, 0, NULL, &param_value_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *device_version = (char *) hcmalloc (param_value_size);
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VERSION, param_value_size, device_version, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_version = device_version;
+
+        // device_opencl_version
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &param_value_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *device_opencl_version = (char *) hcmalloc (param_value_size);
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, device_opencl_version, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_opencl_version = device_opencl_version;
+
+        // max_compute_units
+
+        cl_uint device_processors;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (device_processors), &device_processors, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_processors = device_processors;
+
+        // device_global_mem
+
+        cl_ulong device_global_mem;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (device_global_mem), &device_global_mem, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_global_mem = device_global_mem;
+
+        device_param->device_available_mem = 0;
+
+        // device_maxmem_alloc
+
+        cl_ulong device_maxmem_alloc;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_maxmem_alloc = device_maxmem_alloc;
+
+        // note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes
+        // testwise disabling that
+        //device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff);
+
+        // max_work_group_size
+
+        size_t device_maxworkgroup_size;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_maxworkgroup_size = device_maxworkgroup_size;
+
+        // max_clock_frequency
+
+        cl_uint device_maxclock_frequency;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (device_maxclock_frequency), &device_maxclock_frequency, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_maxclock_frequency = device_maxclock_frequency;
+
+        // device_endian_little
+
+        cl_bool device_endian_little;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_ENDIAN_LITTLE, sizeof (device_endian_little), &device_endian_little, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        if (device_endian_little == CL_FALSE)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", device_id + 1);
+
+          device_param->skipped = true;
+        }
+
+        // device_available
+
+        cl_bool device_available;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_AVAILABLE, sizeof (device_available), &device_available, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        if (device_available == CL_FALSE)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", device_id + 1);
+
+          device_param->skipped = true;
+        }
+
+        // device_compiler_available
+
+        cl_bool device_compiler_available;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPILER_AVAILABLE, sizeof (device_compiler_available), &device_compiler_available, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        if (device_compiler_available == CL_FALSE)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", device_id + 1);
+
+          device_param->skipped = true;
+        }
+
+        // device_execution_capabilities
+
+        cl_device_exec_capabilities device_execution_capabilities;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof (device_execution_capabilities), &device_execution_capabilities, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", device_id + 1);
+
+          device_param->skipped = true;
+        }
+
+        // device_extensions
+
+        size_t device_extensions_size;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXTENSIONS, 0, NULL, &device_extensions_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *device_extensions = hcmalloc (device_extensions_size + 1);
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXTENSIONS, device_extensions_size, device_extensions, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        if (strstr (device_extensions, "base_atomics") == 0)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", device_id + 1);
+
+          device_param->skipped = true;
+        }
+
+        if (strstr (device_extensions, "byte_addressable_store") == 0)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", device_id + 1);
+
+          device_param->skipped = true;
+        }
+
+        hcfree (device_extensions);
+
+        // device_max_constant_buffer_size
+
+        cl_ulong device_max_constant_buffer_size;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (device_max_constant_buffer_size), &device_max_constant_buffer_size, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        if (device_max_constant_buffer_size < 65536)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", device_id + 1);
+
+          device_param->skipped = true;
+        }
+
+        // device_local_mem_size
+
+        cl_ulong device_local_mem_size;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (device_local_mem_size), &device_local_mem_size, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        if (device_local_mem_size < 32768)
+        {
+          event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
+
+          device_param->skipped = true;
+        }
+
+        device_param->device_local_mem_size = device_local_mem_size;
+
+        // device_local_mem_type
+
+        cl_device_local_mem_type device_local_mem_type;
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->device_local_mem_type = device_local_mem_type;
+
+        // If there's both an Intel CPU and an AMD OpenCL runtime it's a tricky situation
+        // Both platforms support CPU device types and therefore both will try to use 100% of the physical resources
+        // This results in both utilizing it for 50%
+        // However, Intel has much better SIMD control over their own hardware
+        // It makes sense to give them full control over their own hardware
+
+        if (device_type & CL_DEVICE_TYPE_CPU)
+        {
+          if (device_param->device_vendor_id == VENDOR_ID_AMD_USE_INTEL)
+          {
+            if (user_options->force == false)
+            {
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Not a native Intel OpenCL runtime. Expect massive speed loss.", device_id + 1);
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
+
+              device_param->skipped = true;
+            }
           }
         }
-      }
 
-      // Since some times we get reports from users about not working hashcat, dropping error messages like:
-      // CL_INVALID_COMMAND_QUEUE and CL_OUT_OF_RESOURCES
-      // Turns out that this is caused by Intel OpenCL runtime handling their GPU devices
-      // Disable such devices unless the user forces to use it
+        // Since some times we get reports from users about not working hashcat, dropping error messages like:
+        // CL_INVALID_COMMAND_QUEUE and CL_OUT_OF_RESOURCES
+        // Turns out that this is caused by Intel OpenCL runtime handling their GPU devices
+        // Disable such devices unless the user forces to use it
 
-      #if !defined (__APPLE__)
-      if (device_type & CL_DEVICE_TYPE_GPU)
-      {
-        if ((device_param->device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->device_vendor_id == VENDOR_ID_INTEL_BEIGNET))
+        #if !defined (__APPLE__)
+        if (device_type & CL_DEVICE_TYPE_GPU)
         {
-          if (user_options->force == false)
+          if ((device_param->device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->device_vendor_id == VENDOR_ID_INTEL_BEIGNET))
           {
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", device_id + 1);
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             We are waiting for updated OpenCL drivers from Intel.");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
+            if (user_options->force == false)
+            {
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", device_id + 1);
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             We are waiting for updated OpenCL drivers from Intel.");
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
-            device_param->skipped = true;
+              device_param->skipped = true;
+            }
           }
         }
-      }
-      #endif // __APPLE__
+        #endif // __APPLE__
 
-      // skipped
+        // skipped
 
-      if ((backend_ctx->devices_filter & (1ULL << device_id)) == 0)
-      {
-        device_param->skipped = true;
-      }
-
-      if ((backend_ctx->device_types_filter & (device_type)) == 0)
-      {
-        device_param->skipped = true;
-      }
-
-      // driver_version
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DRIVER_VERSION, 0, NULL, &param_value_size);
-
-      if (CL_rc == -1) return -1;
-
-      char *driver_version = (char *) hcmalloc (param_value_size);
-
-      CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DRIVER_VERSION, param_value_size, driver_version, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      device_param->driver_version = driver_version;
-
-      // vendor specific
-
-      if (device_param->device_type & CL_DEVICE_TYPE_GPU)
-      {
-        if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
+        if ((backend_ctx->devices_filter & (1ULL << device_id)) == 0)
         {
-          need_adl = true;
-
-          #if defined (__linux__)
-          need_sysfs = true;
-          #endif
+          device_param->skipped = true;
         }
 
-        if ((device_param->platform_vendor_id == VENDOR_ID_NV) && (device_param->device_vendor_id == VENDOR_ID_NV))
+        if ((backend_ctx->device_types_filter & (device_type)) == 0)
         {
-          need_nvml = true;
-
-          #if defined (_WIN) || defined (__CYGWIN__)
-          need_nvapi = true;
-          #endif
-        }
-      }
-
-      if (device_param->device_type & CL_DEVICE_TYPE_GPU)
-      {
-        if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
-        {
-          cl_device_topology_amd amdtopo;
-
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL);
-
-          if (CL_rc == -1) return -1;
-
-          device_param->pcie_bus      = amdtopo.pcie.bus;
-          device_param->pcie_device   = amdtopo.pcie.device;
-          device_param->pcie_function = amdtopo.pcie.function;
+          device_param->skipped = true;
         }
 
-        if ((device_param->platform_vendor_id == VENDOR_ID_NV) && (device_param->device_vendor_id == VENDOR_ID_NV))
+        // driver_version
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DRIVER_VERSION, 0, NULL, &param_value_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *driver_version = (char *) hcmalloc (param_value_size);
+
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DRIVER_VERSION, param_value_size, driver_version, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->driver_version = driver_version;
+
+        // vendor specific
+
+        if (device_param->device_type & CL_DEVICE_TYPE_GPU)
         {
-          cl_uint pci_bus_id_nv;  // is cl_uint the right type for them??
-          cl_uint pci_slot_id_nv;
-
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_PCI_BUS_ID_NV, sizeof (pci_bus_id_nv), &pci_bus_id_nv, NULL);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof (pci_slot_id_nv), &pci_slot_id_nv, NULL);
-
-          if (CL_rc == -1) return -1;
-
-          device_param->pcie_bus      = (u8) (pci_bus_id_nv);
-          device_param->pcie_device   = (u8) (pci_slot_id_nv >> 3);
-          device_param->pcie_function = (u8) (pci_slot_id_nv & 7);
-
-          int sm_minor = 0;
-          int sm_major = 0;
-
-          //if (backend_ctx->cuda)
-          if (0)
+          if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
           {
-            CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device_param->device_cuda);
+            need_adl = true;
 
-            if (CU_rc == -1) return -1;
-
-            CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device_param->device_cuda);
-
-            if (CU_rc == -1) return -1;
+            #if defined (__linux__)
+            need_sysfs = true;
+            #endif
           }
-          else
+
+          if ((device_param->platform_vendor_id == VENDOR_ID_NV) && (device_param->device_vendor_id == VENDOR_ID_NV))
           {
+            need_nvml = true;
+
+            #if defined (_WIN) || defined (__CYGWIN__)
+            need_nvapi = true;
+            #endif
+          }
+        }
+
+        if (device_param->device_type & CL_DEVICE_TYPE_GPU)
+        {
+          if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
+          {
+            cl_device_topology_amd amdtopo;
+
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            device_param->pcie_bus      = amdtopo.pcie.bus;
+            device_param->pcie_device   = amdtopo.pcie.device;
+            device_param->pcie_function = amdtopo.pcie.function;
+          }
+
+          if ((device_param->platform_vendor_id == VENDOR_ID_NV) && (device_param->device_vendor_id == VENDOR_ID_NV))
+          {
+            cl_uint pci_bus_id_nv;  // is cl_uint the right type for them??
+            cl_uint pci_slot_id_nv;
+
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_PCI_BUS_ID_NV, sizeof (pci_bus_id_nv), &pci_bus_id_nv, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof (pci_slot_id_nv), &pci_slot_id_nv, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            device_param->pcie_bus      = (u8) (pci_bus_id_nv);
+            device_param->pcie_device   = (u8) (pci_slot_id_nv >> 3);
+            device_param->pcie_function = (u8) (pci_slot_id_nv & 7);
+
+            int sm_minor = 0;
+            int sm_major = 0;
+
             CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL);
 
             if (CL_rc == -1) return -1;
@@ -4207,357 +4414,357 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
             CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL);
 
             if (CL_rc == -1) return -1;
+
+            device_param->sm_minor = sm_minor;
+            device_param->sm_major = sm_major;
+
+            cl_uint kernel_exec_timeout = 0;
+
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            device_param->kernel_exec_timeout = kernel_exec_timeout;
+
+            // CPU burning loop damper
+            // Value is given as number between 0-100
+            // By default 8%
+
+            device_param->spin_damp = (double) user_options->spin_damp / 100;
           }
-
-          device_param->sm_minor = sm_minor;
-          device_param->sm_major = sm_major;
-
-          cl_uint kernel_exec_timeout = 0;
-
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL);
-
-          if (CL_rc == -1) return -1;
-
-          device_param->kernel_exec_timeout = kernel_exec_timeout;
-
-          // CPU burning loop damper
-          // Value is given as number between 0-100
-          // By default 8%
-
-          device_param->spin_damp = (double) user_options->spin_damp / 100;
         }
-      }
 
-      // common driver check
+        // common driver check
 
-      if (device_param->skipped == false)
-      {
-        if ((user_options->force == false) && (user_options->opencl_info == false))
+        if (device_param->skipped == false)
         {
-          if (device_type & CL_DEVICE_TYPE_CPU)
+          if ((user_options->force == false) && (user_options->opencl_info == false))
           {
-            if (device_param->platform_vendor_id == VENDOR_ID_INTEL_SDK)
+            if (device_type & CL_DEVICE_TYPE_CPU)
             {
-              bool intel_warn = false;
-
-              // Intel OpenCL runtime 18
-
-              int opencl_driver1 = 0;
-              int opencl_driver2 = 0;
-              int opencl_driver3 = 0;
-              int opencl_driver4 = 0;
-
-              const int res18 = sscanf (device_param->driver_version, "%u.%u.%u.%u", &opencl_driver1, &opencl_driver2, &opencl_driver3, &opencl_driver4);
-
-              if (res18 == 4)
+              if (device_param->platform_vendor_id == VENDOR_ID_INTEL_SDK)
               {
-                // so far all versions 18 are ok
-              }
-              else
-              {
-                // Intel OpenCL runtime 16
+                bool intel_warn = false;
 
-                float opencl_version = 0;
-                int   opencl_build   = 0;
+                // Intel OpenCL runtime 18
 
-                const int res16 = sscanf (device_param->device_version, "OpenCL %f (Build %d)", &opencl_version, &opencl_build);
+                int opencl_driver1 = 0;
+                int opencl_driver2 = 0;
+                int opencl_driver3 = 0;
+                int opencl_driver4 = 0;
 
-                if (res16 == 2)
+                const int res18 = sscanf (device_param->driver_version, "%u.%u.%u.%u", &opencl_driver1, &opencl_driver2, &opencl_driver3, &opencl_driver4);
+
+                if (res18 == 4)
                 {
-                  if (opencl_build < 25) intel_warn = true;
+                  // so far all versions 18 are ok
+                }
+                else
+                {
+                  // Intel OpenCL runtime 16
+
+                  float opencl_version = 0;
+                  int   opencl_build   = 0;
+
+                  const int res16 = sscanf (device_param->device_version, "OpenCL %f (Build %d)", &opencl_version, &opencl_build);
+
+                  if (res16 == 2)
+                  {
+                    if (opencl_build < 25) intel_warn = true;
+                  }
+                }
+
+                if (intel_warn == true)
+                {
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", device_id + 1, device_param->driver_version);
+
+                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
+                  event_log_warning (hashcat_ctx, "See hashcat.net for officially supported NVIDIA drivers.");
+                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
+                  event_log_warning (hashcat_ctx, NULL);
+
+                  return -1;
+                }
+              }
+            }
+            else if (device_type & CL_DEVICE_TYPE_GPU)
+            {
+              if (device_param->platform_vendor_id == VENDOR_ID_AMD)
+              {
+                bool amd_warn = true;
+
+                #if defined (__linux__)
+                // AMDGPU-PRO Driver 16.40 and higher
+                if (strtoul (device_param->driver_version, NULL, 10) >= 2117) amd_warn = false;
+                // AMDGPU-PRO Driver 16.50 is known to be broken
+                if (strtoul (device_param->driver_version, NULL, 10) == 2236) amd_warn = true;
+                // AMDGPU-PRO Driver 16.60 is known to be broken
+                if (strtoul (device_param->driver_version, NULL, 10) == 2264) amd_warn = true;
+                // AMDGPU-PRO Driver 17.10 is known to be broken
+                if (strtoul (device_param->driver_version, NULL, 10) == 2348) amd_warn = true;
+                // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117
+                #elif defined (_WIN)
+                // AMD Radeon Software 14.9 and higher, should be updated to 15.12
+                if (strtoul (device_param->driver_version, NULL, 10) >= 1573) amd_warn = false;
+                #else
+                // we have no information about other os
+                if (amd_warn == true) amd_warn = false;
+                #endif
+
+                if (amd_warn == true)
+                {
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", device_id + 1, device_param->driver_version);
+
+                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported AMD driver.");
+                  event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD drivers.");
+                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
+                  event_log_warning (hashcat_ctx, NULL);
+
+                  return -1;
                 }
               }
 
-              if (intel_warn == true)
+              if (device_param->platform_vendor_id == VENDOR_ID_NV)
               {
-                event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", device_id + 1, device_param->driver_version);
+                int nv_warn = true;
 
-                event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
-                event_log_warning (hashcat_ctx, "See hashcat.net for officially supported NVIDIA drivers.");
-                event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
-                event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
-                event_log_warning (hashcat_ctx, NULL);
+                int version_maj = 0;
+                int version_min = 0;
 
-                return -1;
-              }
-            }
-          }
-          else if (device_type & CL_DEVICE_TYPE_GPU)
-          {
-            if (device_param->platform_vendor_id == VENDOR_ID_AMD)
-            {
-              bool amd_warn = true;
+                const int r = sscanf (device_param->driver_version, "%d.%d", &version_maj, &version_min);
 
-              #if defined (__linux__)
-              // AMDGPU-PRO Driver 16.40 and higher
-              if (strtoul (device_param->driver_version, NULL, 10) >= 2117) amd_warn = false;
-              // AMDGPU-PRO Driver 16.50 is known to be broken
-              if (strtoul (device_param->driver_version, NULL, 10) == 2236) amd_warn = true;
-              // AMDGPU-PRO Driver 16.60 is known to be broken
-              if (strtoul (device_param->driver_version, NULL, 10) == 2264) amd_warn = true;
-              // AMDGPU-PRO Driver 17.10 is known to be broken
-              if (strtoul (device_param->driver_version, NULL, 10) == 2348) amd_warn = true;
-              // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117
-              #elif defined (_WIN)
-              // AMD Radeon Software 14.9 and higher, should be updated to 15.12
-              if (strtoul (device_param->driver_version, NULL, 10) >= 1573) amd_warn = false;
-              #else
-              // we have no information about other os
-              if (amd_warn == true) amd_warn = false;
-              #endif
-
-              if (amd_warn == true)
-              {
-                event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", device_id + 1, device_param->driver_version);
-
-                event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported AMD driver.");
-                event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD drivers.");
-                event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
-                event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
-                event_log_warning (hashcat_ctx, NULL);
-
-                return -1;
-              }
-            }
-
-            if (device_param->platform_vendor_id == VENDOR_ID_NV)
-            {
-              int nv_warn = true;
-
-              int version_maj = 0;
-              int version_min = 0;
-
-              const int r = sscanf (device_param->driver_version, "%d.%d", &version_maj, &version_min);
-
-              if (r == 2)
-              {
-                if (version_maj >= 367)
+                if (r == 2)
                 {
-                  if (version_maj == 418)
+                  if (version_maj >= 367)
                   {
-                    // older 418.x versions are known to be broken.
-                    // for instance, NVIDIA-Linux-x86_64-418.43.run
-                    // run ./hashcat -b -m 2501 results in self-test fail
+                    if (version_maj == 418)
+                    {
+                      // older 418.x versions are known to be broken.
+                      // for instance, NVIDIA-Linux-x86_64-418.43.run
+                      // run ./hashcat -b -m 2501 results in self-test fail
 
-                    if (version_min >= 56)
+                      if (version_min >= 56)
+                      {
+                        nv_warn = false;
+                      }
+                    }
+                    else
                     {
                       nv_warn = false;
                     }
                   }
-                  else
-                  {
-                    nv_warn = false;
-                  }
+                }
+                else
+                {
+                  // unknown version scheme, probably new driver version
+
+                  nv_warn = false;
+                }
+
+                if (nv_warn == true)
+                {
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->driver_version);
+
+                  event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
+                  event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers.");
+                  event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+                  event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
+                  event_log_warning (hashcat_ctx, NULL);
+
+                  return -1;
+                }
+
+                if (device_param->sm_major < 5)
+                {
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+                }
+
+                if (device_param->kernel_exec_timeout != 0)
+                {
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
                 }
               }
-              else
+
+              if ((strstr (device_param->device_opencl_version, "beignet")) || (strstr (device_param->device_version, "beignet")))
               {
-                // unknown version scheme, probably new driver version
+                event_log_error (hashcat_ctx, "* Device #%u: Intel beignet driver detected!", device_id + 1);
 
-                nv_warn = false;
-              }
-
-              if (nv_warn == true)
-              {
-                event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->driver_version);
-
-                event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
-                event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers.");
-                event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+                event_log_warning (hashcat_ctx, "The beignet driver has been marked as likely to fail kernel compilation.");
                 event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
                 event_log_warning (hashcat_ctx, NULL);
 
                 return -1;
               }
-
-              if (device_param->sm_major < 5)
-              {
-                if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
-                if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
-                if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
-              }
-
-              if (device_param->kernel_exec_timeout != 0)
-              {
-                if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
-                if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
-                if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
-              }
-            }
-
-            if ((strstr (device_param->device_opencl_version, "beignet")) || (strstr (device_param->device_version, "beignet")))
-            {
-              event_log_error (hashcat_ctx, "* Device #%u: Intel beignet driver detected!", device_id + 1);
-
-              event_log_warning (hashcat_ctx, "The beignet driver has been marked as likely to fail kernel compilation.");
-              event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
-              event_log_warning (hashcat_ctx, NULL);
-
-              return -1;
             }
           }
+
+          /**
+           * activate device
+           */
+
+          devices_active++;
         }
 
         /**
-         * activate device
+         * create context for each device
          */
 
-        devices_active++;
-      }
+        cl_context context;
 
-      /**
-       * create context for each device
-       */
+        cl_context_properties properties[3];
 
-      cl_context context;
+        properties[0] = CL_CONTEXT_PLATFORM;
+        properties[1] = (cl_context_properties) device_param->platform;
+        properties[2] = 0;
 
-      cl_context_properties properties[3];
+        CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->device, NULL, NULL, &context);
 
-      properties[0] = CL_CONTEXT_PLATFORM;
-      properties[1] = (cl_context_properties) device_param->platform;
-      properties[2] = 0;
+        if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->device, NULL, NULL, &context);
+        /**
+         * create command-queue
+         */
 
-      if (CL_rc == -1) return -1;
+        cl_command_queue command_queue;
 
-      /**
-       * create command-queue
-       */
+        CL_rc = hc_clCreateCommandQueue (hashcat_ctx, context, device_param->device, 0, &command_queue);
 
-      cl_command_queue command_queue;
+        if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clCreateCommandQueue (hashcat_ctx, context, device_param->device, 0, &command_queue);
-
-      if (CL_rc == -1) return -1;
-
-      if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_AMD))
-      {
-        const bool has_vadd3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
-
-        device_param->has_vadd3 = has_vadd3;
-
-        const bool has_vbfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
-
-        device_param->has_vbfe = has_vbfe;
-
-        const bool has_vperm = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
-
-        device_param->has_vperm = has_vperm;
-      }
-
-      if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
-      {
-        const bool has_bfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
-
-        device_param->has_bfe = has_bfe;
-
-        const bool has_lop3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");
-
-        device_param->has_lop3 = has_lop3;
-
-        const bool has_mov64 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
-
-        device_param->has_mov64 = has_mov64;
-
-        const bool has_prmt = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
-
-        device_param->has_prmt = has_prmt;
-      }
-
-      // device_available_mem
-
-      #define MAX_ALLOC_CHECKS_CNT  8192
-      #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
-
-      device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
-
-      #if defined (_WIN)
-      if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
-      #else
-      if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || (device_param->platform_vendor_id == VENDOR_ID_AMD)))
-      #endif
-      {
-        // OK, so the problem here is the following:
-        // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
-        // but there's no way to ask for available memory on the device.
-        // In combination, most OpenCL runtimes implementation of clCreateBuffer()
-        // are doing so called lazy memory allocation on the device.
-        // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
-        // running on the host we end up with an error type of this:
-        // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
-        // The clEnqueueNDRangeKernel() is because of the lazy allocation
-        // The best way to workaround this problem is if we would be able to ask for available memory,
-        // The idea here is to try to evaluate available memory by allocating it till it errors
-
-        cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
-
-        u64 c;
-
-        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_AMD))
         {
-          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+          const bool has_vadd3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
-          cl_int CL_err;
+          device_param->has_vadd3 = has_vadd3;
 
-          OCL_PTR *ocl = backend_ctx->ocl;
+          const bool has_vbfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
-          tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
+          device_param->has_vbfe = has_vbfe;
 
-          if (CL_err != CL_SUCCESS)
-          {
-            c--;
+          const bool has_vperm = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
-            break;
-          }
-
-          // transfer only a few byte should be enough to force the runtime to actually allocate the memory
-
-          u8 tmp_host[8];
-
-          CL_err = ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL);
-
-          if (CL_err != CL_SUCCESS) break;
-
-          CL_err = ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL);
-
-          if (CL_err != CL_SUCCESS) break;
-
-          CL_err = ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL);
-
-          if (CL_err != CL_SUCCESS) break;
-
-          CL_err = ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL);
-
-          if (CL_err != CL_SUCCESS) break;
+          device_param->has_vperm = has_vperm;
         }
 
-        device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
-
-        // clean up
-
-        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
         {
-          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+          const bool has_bfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
 
-          if (tmp_device[c] != NULL)
-          {
-            CL_rc = hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]);
+          device_param->has_bfe = has_bfe;
 
-            if (CL_rc == -1) return -1;
-          }
+          const bool has_lop3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");
+
+          device_param->has_lop3 = has_lop3;
+
+          const bool has_mov64 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
+
+          device_param->has_mov64 = has_mov64;
+
+          const bool has_prmt = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
+
+          device_param->has_prmt = has_prmt;
         }
 
-        hcfree (tmp_device);
+        // device_available_mem
+
+        #define MAX_ALLOC_CHECKS_CNT  8192
+        #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
+
+        device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
+
+        #if defined (_WIN)
+        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
+        #else
+        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || (device_param->platform_vendor_id == VENDOR_ID_AMD)))
+        #endif
+        {
+          // OK, so the problem here is the following:
+          // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
+          // but there's no way to ask for available memory on the device.
+          // In combination, most OpenCL runtimes implementation of clCreateBuffer()
+          // are doing so called lazy memory allocation on the device.
+          // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
+          // running on the host we end up with an error type of this:
+          // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
+          // The clEnqueueNDRangeKernel() is because of the lazy allocation
+          // The best way to workaround this problem is if we would be able to ask for available memory,
+          // The idea here is to try to evaluate available memory by allocating it till it errors
+
+          cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
+
+          u64 c;
+
+          for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+          {
+            if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+            cl_int CL_err;
+
+            OCL_PTR *ocl = backend_ctx->ocl;
+
+            tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
+
+            if (CL_err != CL_SUCCESS)
+            {
+              c--;
+
+              break;
+            }
+
+            // transfer only a few byte should be enough to force the runtime to actually allocate the memory
+
+            u8 tmp_host[8];
+
+            CL_err = ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL);
+
+            if (CL_err != CL_SUCCESS) break;
+
+            CL_err = ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL);
+
+            if (CL_err != CL_SUCCESS) break;
+
+            CL_err = ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL);
+
+            if (CL_err != CL_SUCCESS) break;
+
+            CL_err = ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL);
+
+            if (CL_err != CL_SUCCESS) break;
+          }
+
+          device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
+
+          // clean up
+
+          for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+          {
+            if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+            if (tmp_device[c] != NULL)
+            {
+              CL_rc = hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]);
+
+              if (CL_rc == -1) return -1;
+            }
+          }
+
+          hcfree (tmp_device);
+        }
+
+        hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
+
+        hc_clReleaseContext (hashcat_ctx, context);
+
+        // next please
+
+        devices_cnt++;
       }
-
-      hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
-
-      hc_clReleaseContext (hashcat_ctx, context);
-
-      // next please
-
-      devices_cnt++;
     }
   }
 

From d73c0ac8a93b26ef8751372655bc0452f5d02a28 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 28 Apr 2019 18:54:26 +0200
Subject: [PATCH 13/73] More CUDA attribute queries

---
 include/backend.h |   2 +
 include/types.h   |   2 +
 src/backend.c     | 178 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 180 insertions(+), 2 deletions(-)

diff --git a/include/backend.h b/include/backend.h
index d879386aa..9323d3880 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -44,6 +44,8 @@ int hc_cuDeviceGetAttribute      (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_
 int hc_cuDeviceGetCount          (hashcat_ctx_t *hashcat_ctx, int *count);
 int hc_cuDeviceGet               (hashcat_ctx_t *hashcat_ctx, CUdevice *device, int ordinal);
 int hc_cuDeviceGetName           (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev);
+int hc_cuDeviceTotalMem          (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev);
+int hc_cuDriverGetVersion        (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
diff --git a/include/types.h b/include/types.h
index 954b1ee2a..2eeb3b099 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1352,6 +1352,8 @@ typedef struct backend_ctx
   int                 opencl_devices_cnt;
   int                 opencl_devices_active;
 
+  int                 cuda_driver_version;
+
   cl_uint             platforms_cnt;
   cl_platform_id     *platforms;
   char              **platforms_vendor;
diff --git a/src/backend.c b/src/backend.c
index 476d47c22..e79f0535b 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -986,6 +986,60 @@ int hc_cuDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevic
   return 0;
 }
 
+int hc_cuDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuDeviceTotalMem (bytes, dev);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuDeviceTotalMem(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuDeviceTotalMem(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuDriverGetVersion (driverVersion);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuDriverGetVersion(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuDriverGetVersion(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 
 // OpenCL
 
@@ -3719,6 +3773,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   if (backend_ctx->enabled == false) return 0;
 
+  hc_device_param_t *devices_param = backend_ctx->devices_param;
+
   bool need_adl     = false;
   bool need_nvml    = false;
   bool need_nvapi   = false;
@@ -3730,6 +3786,18 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   if (backend_ctx->cuda)
   {
+    // cuda version
+
+    int cuda_driver_version = 0;
+
+    const int rc_cuDriverGetVersion = hc_cuDriverGetVersion (hashcat_ctx, &cuda_driver_version);
+
+    if (rc_cuDriverGetVersion == -1) return -1;
+
+    backend_ctx->cuda_driver_version = cuda_driver_version;
+
+    // device count
+
     int cuda_devices_cnt = 0;
 
     const int rc_cuDeviceGetCount = hc_cuDeviceGetCount (hashcat_ctx, &cuda_devices_cnt);
@@ -3743,7 +3811,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     backend_devices_cnt += cuda_devices_cnt;
 
-    hc_device_param_t *devices_param = backend_ctx->devices_param;
+    // device specific
 
     for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++, backend_devices_idx++)
     {
@@ -3776,6 +3844,28 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       hc_string_trim_trailing (device_name);
 
+      // max_compute_units
+
+      int device_processors;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_processors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->device_processors = device_processors;
+
+      // device_global_mem
+
+      size_t bytes;
+
+      CU_rc = hc_cuDeviceTotalMem (hashcat_ctx, &bytes, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->device_global_mem = (u64) bytes;
+
+      device_param->device_available_mem = 0;
+
       // sm_minor, sm_major
 
       int sm_major = 0;
@@ -3792,8 +3882,92 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       device_param->sm_major = sm_major;
       device_param->sm_minor = sm_minor;
 
+      // device_maxworkgroup_size
+
+      int device_maxworkgroup_size;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->device_maxworkgroup_size = device_maxworkgroup_size;
+
+      // max_clock_frequency
+
+      int device_maxclock_frequency;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->device_maxclock_frequency = device_maxclock_frequency / 1000;
+
+      // pcie_bus, pcie_device, pcie_function
+
+      int pci_bus_id_nv;  // is cl_uint the right type for them??
+      int pci_slot_id_nv;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->pcie_bus      = (u8) (pci_bus_id_nv);
+      device_param->pcie_device   = (u8) (pci_slot_id_nv >> 3);
+      device_param->pcie_function = (u8) (pci_slot_id_nv & 7);
+
+      // kernel_exec_timeout
+
+      int kernel_exec_timeout;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->kernel_exec_timeout = kernel_exec_timeout;
+
+      // max_shared_memory_per_block
+
+      int max_shared_memory_per_block;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      if (max_shared_memory_per_block < 32768)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", backend_devices_idx + 1);
+
+        device_param->skipped = true;
+      }
+
+      // device_max_constant_buffer_size
+
+      int device_max_constant_buffer_size;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device_cuda);
+
+      if (CU_rc == -1) return -1;
+
+      if (device_max_constant_buffer_size < 65536)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", backend_devices_idx + 1);
+
+        device_param->skipped = true;
+      }
+
+      // device_local_mem_type
+
+      cl_device_local_mem_type device_local_mem_type = CL_LOCAL;
+
+      device_param->device_local_mem_type = device_local_mem_type;
+
+      //
+
 
-    printf ("%s %d %d\n", device_name, sm_major, sm_minor);
 
     }
   }

From d862458ab506640aeccf08222de4b8a50a881651 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 29 Apr 2019 10:21:59 +0200
Subject: [PATCH 14/73] Begin renaming API specific variables in backend
 section

---
 include/types.h            | 151 ++++++++++++++++++++-----------------
 src/backend.c              | 133 ++++++++++++++++----------------
 src/hwmon.c                |  30 ++++----
 src/modules/module_03200.c |   2 +-
 src/modules/module_07900.c |   2 +-
 src/modules/module_09000.c |   2 +-
 src/modules/module_18600.c |   2 +-
 src/monitor.c              |   2 +-
 src/terminal.c             |   4 +-
 9 files changed, 171 insertions(+), 157 deletions(-)

diff --git a/include/types.h b/include/types.h
index 2eeb3b099..386dfba55 100644
--- a/include/types.h
+++ b/include/types.h
@@ -76,13 +76,13 @@ typedef struct timespec   hc_timer_t;
 #endif
 
 #if defined (_WIN)
-typedef HANDLE              hc_thread_t;
-typedef HANDLE              hc_thread_mutex_t;
-typedef HANDLE              hc_thread_semaphore_t;
+typedef HANDLE          hc_thread_t;
+typedef HANDLE          hc_thread_mutex_t;
+typedef HANDLE          hc_thread_semaphore_t;
 #else
-typedef pthread_t           hc_thread_t;
-typedef pthread_mutex_t     hc_thread_mutex_t;
-typedef sem_t               hc_thread_semaphore_t;
+typedef pthread_t       hc_thread_t;
+typedef pthread_mutex_t hc_thread_mutex_t;
+typedef sem_t           hc_thread_semaphore_t;
 #endif
 
 // enums
@@ -995,27 +995,17 @@ typedef struct link_speed
 
 typedef struct hc_device_param
 {
-  CUdevice        device_cuda;
-
-  cl_device_id    device;
-  cl_device_type  device_type;
-
   u32     device_id;
-  u32     platform_devices_id;  // for mapping with hms devices
-
-  bool    skipped;              // permanent
-  bool    skipped_warning;      // iteration
-
-  st_status_t st_status;
-
-  int     sm_major;
-  int     sm_minor;
-  u32     kernel_exec_timeout;
 
   u8      pcie_bus;
   u8      pcie_device;
   u8      pcie_function;
 
+  u32     platform_devices_id;  // for mapping with hms devices
+
+  bool    skipped;              // permanent
+  bool    skipped_warning;      // iteration
+
   u32     device_processors;
   u64     device_maxmem_alloc;
   u64     device_global_mem;
@@ -1023,7 +1013,13 @@ typedef struct hc_device_param
   u32     device_maxclock_frequency;
   size_t  device_maxworkgroup_size;
   u64     device_local_mem_size;
-  cl_device_local_mem_type device_local_mem_type;
+  int     device_local_mem_type;
+
+  int     sm_major;
+  int     sm_minor;
+  u32     kernel_exec_timeout;
+
+  st_status_t st_status;
 
   u32     vector_width;
 
@@ -1223,6 +1219,50 @@ typedef struct hc_device_param
 
   double  spin_damp;
 
+
+  void   *kernel_params[PARAMCNT];
+  void   *kernel_params_mp[PARAMCNT];
+  void   *kernel_params_mp_r[PARAMCNT];
+  void   *kernel_params_mp_l[PARAMCNT];
+  void   *kernel_params_amp[PARAMCNT];
+  void   *kernel_params_tm[PARAMCNT];
+  void   *kernel_params_memset[PARAMCNT];
+  void   *kernel_params_atinit[PARAMCNT];
+  void   *kernel_params_decompress[PARAMCNT];
+
+  u32     kernel_params_buf32[PARAMCNT];
+  u64     kernel_params_buf64[PARAMCNT];
+
+  u32     kernel_params_mp_buf32[PARAMCNT];
+  u64     kernel_params_mp_buf64[PARAMCNT];
+
+  u32     kernel_params_mp_r_buf32[PARAMCNT];
+  u64     kernel_params_mp_r_buf64[PARAMCNT];
+
+  u32     kernel_params_mp_l_buf32[PARAMCNT];
+  u64     kernel_params_mp_l_buf64[PARAMCNT];
+
+  u32     kernel_params_amp_buf32[PARAMCNT];
+  u64     kernel_params_amp_buf64[PARAMCNT];
+
+  u32     kernel_params_memset_buf32[PARAMCNT];
+  u64     kernel_params_memset_buf64[PARAMCNT];
+
+  u32     kernel_params_atinit_buf32[PARAMCNT];
+  u64     kernel_params_atinit_buf64[PARAMCNT];
+
+  u32     kernel_params_decompress_buf32[PARAMCNT];
+  u64     kernel_params_decompress_buf64[PARAMCNT];
+
+  // API: cuda
+
+  CUdevice        cuda_device;
+
+  // API: opencl
+
+  cl_device_id    device;
+  cl_device_type  opencl_device_type;
+
   cl_platform_id platform;
 
   cl_uint  device_vendor_id;
@@ -1296,40 +1336,6 @@ typedef struct hc_device_param
   cl_mem  d_st_salts_buf;
   cl_mem  d_st_esalts_buf;
 
-  void   *kernel_params[PARAMCNT];
-  void   *kernel_params_mp[PARAMCNT];
-  void   *kernel_params_mp_r[PARAMCNT];
-  void   *kernel_params_mp_l[PARAMCNT];
-  void   *kernel_params_amp[PARAMCNT];
-  void   *kernel_params_tm[PARAMCNT];
-  void   *kernel_params_memset[PARAMCNT];
-  void   *kernel_params_atinit[PARAMCNT];
-  void   *kernel_params_decompress[PARAMCNT];
-
-  u32     kernel_params_buf32[PARAMCNT];
-  u64     kernel_params_buf64[PARAMCNT];
-
-  u32     kernel_params_mp_buf32[PARAMCNT];
-  u64     kernel_params_mp_buf64[PARAMCNT];
-
-  u32     kernel_params_mp_r_buf32[PARAMCNT];
-  u64     kernel_params_mp_r_buf64[PARAMCNT];
-
-  u32     kernel_params_mp_l_buf32[PARAMCNT];
-  u64     kernel_params_mp_l_buf64[PARAMCNT];
-
-  u32     kernel_params_amp_buf32[PARAMCNT];
-  u64     kernel_params_amp_buf64[PARAMCNT];
-
-  u32     kernel_params_memset_buf32[PARAMCNT];
-  u64     kernel_params_memset_buf64[PARAMCNT];
-
-  u32     kernel_params_atinit_buf32[PARAMCNT];
-  u64     kernel_params_atinit_buf64[PARAMCNT];
-
-  u32     kernel_params_decompress_buf32[PARAMCNT];
-  u64     kernel_params_decompress_buf64[PARAMCNT];
-
 } hc_device_param_t;
 
 typedef struct backend_ctx
@@ -1352,18 +1358,6 @@ typedef struct backend_ctx
   int                 opencl_devices_cnt;
   int                 opencl_devices_active;
 
-  int                 cuda_driver_version;
-
-  cl_uint             platforms_cnt;
-  cl_platform_id     *platforms;
-  char              **platforms_vendor;
-  char              **platforms_name;
-  char              **platforms_version;
-  bool               *platforms_skipped;
-
-  cl_uint             platform_devices_cnt;
-  cl_device_id       *platform_devices;
-
   u32                 devices_cnt;
   u32                 devices_active;
 
@@ -1374,9 +1368,7 @@ typedef struct backend_ctx
   u64                 kernel_power_all;
   u64                 kernel_power_final; // we save that so that all divisions are done from the same base
 
-  u64                 opencl_platforms_filter;
   u64                 devices_filter;
-  cl_device_type      device_types_filter;
 
   double              target_msec;
 
@@ -1389,6 +1381,25 @@ typedef struct backend_ctx
 
   int                 force_jit_compilation;
 
+  // cuda
+
+  int                 cuda_driver_version;
+
+  // opencl
+
+  cl_uint             platforms_cnt;
+  cl_platform_id     *platforms;
+  char              **platforms_vendor;
+  char              **platforms_name;
+  char              **platforms_version;
+  bool               *platforms_skipped;
+
+  cl_uint             platform_devices_cnt;
+  cl_device_id       *platform_devices;
+
+  u64                 opencl_platforms_filter;
+  cl_device_type      opencl_device_types_filter;
+
 } backend_ctx_t;
 
 typedef enum kernel_workload
diff --git a/src/backend.c b/src/backend.c
index e79f0535b..47b93f74c 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -191,9 +191,9 @@ static bool setup_devices_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl
   return true;
 }
 
-static bool setup_device_types_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_device_types, cl_device_type *out)
+static bool setup_opencl_device_types_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_device_types, cl_device_type *out)
 {
-  cl_device_type device_types_filter = 0;
+  cl_device_type opencl_device_types_filter = 0;
 
   if (opencl_device_types)
   {
@@ -211,14 +211,14 @@ static bool setup_device_types_filter (hashcat_ctx_t *hashcat_ctx, const char *o
 
       if (device_type < 1 || device_type > 3)
       {
-        event_log_error (hashcat_ctx, "Invalid device_type %d specified.", device_type);
+        event_log_error (hashcat_ctx, "Invalid OpenCL device-type %d specified.", device_type);
 
         hcfree (device_types);
 
         return false;
       }
 
-      device_types_filter |= 1u << device_type;
+      opencl_device_types_filter |= 1u << device_type;
 
     } while ((next = strtok_r (NULL, ",", &saveptr)) != NULL);
 
@@ -229,10 +229,10 @@ static bool setup_device_types_filter (hashcat_ctx_t *hashcat_ctx, const char *o
     // Do not use CPU by default, this often reduces GPU performance because
     // the CPU is too busy to handle GPU synchronization
 
-    device_types_filter = CL_DEVICE_TYPE_ALL & ~CL_DEVICE_TYPE_CPU;
+    opencl_device_types_filter = CL_DEVICE_TYPE_ALL & ~CL_DEVICE_TYPE_CPU;
   }
 
-  *out = device_types_filter;
+  *out = opencl_device_types_filter;
 
   return true;
 }
@@ -330,7 +330,7 @@ static bool write_kernel_binary (hashcat_ctx_t *hashcat_ctx, char *kernel_file,
   return true;
 }
 
-static bool test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, const char *kernel_buf)
+static bool opencl_test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, const char *kernel_buf)
 {
   int CL_rc;
 
@@ -1040,7 +1040,6 @@ int hc_cuDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion)
   return 0;
 }
 
-
 // OpenCL
 
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
@@ -1744,6 +1743,8 @@ int hc_clReleaseEvent (hashcat_ctx_t *hashcat_ctx, cl_event event)
   return 0;
 }
 
+// Backend
+
 int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw)
 {
   pw_idx_t pw_idx;
@@ -3555,13 +3556,13 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
    * OpenCL device type selection
    */
 
-  cl_device_type device_types_filter;
+  cl_device_type opencl_device_types_filter;
 
-  const bool rc_device_types_filter = setup_device_types_filter (hashcat_ctx, user_options->opencl_device_types, &device_types_filter);
+  const bool rc_opencl_device_types_filter = setup_opencl_device_types_filter (hashcat_ctx, user_options->opencl_device_types, &opencl_device_types_filter);
 
-  if (rc_device_types_filter == false) return -1;
+  if (rc_opencl_device_types_filter == false) return -1;
 
-  backend_ctx->device_types_filter = device_types_filter;
+  backend_ctx->opencl_device_types_filter = opencl_device_types_filter;
 
   /**
    * Backend structures
@@ -3701,7 +3702,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
     if ((device_types_all & (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR)) == 0)
     {
-      device_types_filter |= CL_DEVICE_TYPE_CPU;
+      opencl_device_types_filter |= CL_DEVICE_TYPE_CPU;
     }
 
     // In another case, when the user uses --stdout, using CPU devices is much faster to setup
@@ -3711,11 +3712,11 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     {
       if (device_types_all & CL_DEVICE_TYPE_CPU)
       {
-        device_types_filter = CL_DEVICE_TYPE_CPU;
+        opencl_device_types_filter = CL_DEVICE_TYPE_CPU;
       }
     }
 
-    backend_ctx->device_types_filter = device_types_filter;
+    backend_ctx->opencl_device_types_filter = opencl_device_types_filter;
   }
 
   backend_ctx->enabled = true;
@@ -3820,21 +3821,21 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       backend_ctx->backend_device_from_cuda[cuda_devices_idx]  = backend_devices_idx;
       backend_ctx->backend_device_to_cuda[backend_devices_idx] = cuda_devices_idx;
 
-      CUdevice device_cuda;
+      CUdevice cuda_device;
 
       int CU_rc;
 
-      CU_rc = hc_cuDeviceGet (hashcat_ctx, &device_cuda, cuda_devices_idx);
+      CU_rc = hc_cuDeviceGet (hashcat_ctx, &cuda_device, cuda_devices_idx);
 
       if (CU_rc == -1) return -1;
 
-      device_param->device_cuda = device_cuda;
+      device_param->cuda_device = cuda_device;
 
       // device_name
 
       char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY);
 
-      CU_rc = hc_cuDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, device_cuda);
+      CU_rc = hc_cuDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, cuda_device);
 
       if (CU_rc == -1) return -1;
 
@@ -3844,26 +3845,28 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       hc_string_trim_trailing (device_name);
 
-      // max_compute_units
+      // device_processors
 
       int device_processors;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_processors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_processors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, cuda_device);
 
       if (CU_rc == -1) return -1;
 
       device_param->device_processors = device_processors;
 
-      // device_global_mem
+      // device_global_mem, device_maxmem_alloc, device_available_mem
 
       size_t bytes;
 
-      CU_rc = hc_cuDeviceTotalMem (hashcat_ctx, &bytes, device_cuda);
+      CU_rc = hc_cuDeviceTotalMem (hashcat_ctx, &bytes, cuda_device);
 
       if (CU_rc == -1) return -1;
 
       device_param->device_global_mem = (u64) bytes;
 
+      device_param->device_maxmem_alloc = (u64) bytes;
+
       device_param->device_available_mem = 0;
 
       // sm_minor, sm_major
@@ -3871,11 +3874,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       int sm_major = 0;
       int sm_minor = 0;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuda_device);
 
       if (CU_rc == -1) return -1;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &sm_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuda_device);
 
       if (CU_rc == -1) return -1;
 
@@ -3886,7 +3889,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int device_maxworkgroup_size;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuda_device);
 
       if (CU_rc == -1) return -1;
 
@@ -3896,7 +3899,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int device_maxclock_frequency;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, cuda_device);
 
       if (CU_rc == -1) return -1;
 
@@ -3907,11 +3910,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       int pci_bus_id_nv;  // is cl_uint the right type for them??
       int pci_slot_id_nv;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cuda_device);
 
       if (CU_rc == -1) return -1;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cuda_device);
 
       if (CU_rc == -1) return -1;
 
@@ -3923,7 +3926,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int kernel_exec_timeout;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, cuda_device);
 
       if (CU_rc == -1) return -1;
 
@@ -3933,7 +3936,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int max_shared_memory_per_block;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, cuda_device);
 
       if (CU_rc == -1) return -1;
 
@@ -3948,7 +3951,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       int device_max_constant_buffer_size;
 
-      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device_cuda);
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, cuda_device);
 
       if (CU_rc == -1) return -1;
 
@@ -4138,17 +4141,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         device_param->platform = platform;
 
-        // device_type
+        // opencl_device_type
 
-        cl_device_type device_type;
+        cl_device_type opencl_device_type;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_TYPE, sizeof (device_type), &device_type, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_TYPE, sizeof (opencl_device_type), &opencl_device_type, NULL);
 
         if (CL_rc == -1) return -1;
 
-        device_type &= ~CL_DEVICE_TYPE_DEFAULT;
+        opencl_device_type &= ~CL_DEVICE_TYPE_DEFAULT;
 
-        device_param->device_type = device_type;
+        device_param->opencl_device_type = opencl_device_type;
 
         // device_name
 
@@ -4461,7 +4464,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         // However, Intel has much better SIMD control over their own hardware
         // It makes sense to give them full control over their own hardware
 
-        if (device_type & CL_DEVICE_TYPE_CPU)
+        if (opencl_device_type & CL_DEVICE_TYPE_CPU)
         {
           if (device_param->device_vendor_id == VENDOR_ID_AMD_USE_INTEL)
           {
@@ -4481,7 +4484,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         // Disable such devices unless the user forces to use it
 
         #if !defined (__APPLE__)
-        if (device_type & CL_DEVICE_TYPE_GPU)
+        if (opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
           if ((device_param->device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->device_vendor_id == VENDOR_ID_INTEL_BEIGNET))
           {
@@ -4504,7 +4507,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           device_param->skipped = true;
         }
 
-        if ((backend_ctx->device_types_filter & (device_type)) == 0)
+        if ((backend_ctx->opencl_device_types_filter & (opencl_device_type)) == 0)
         {
           device_param->skipped = true;
         }
@@ -4525,7 +4528,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         // vendor specific
 
-        if (device_param->device_type & CL_DEVICE_TYPE_GPU)
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
           if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
           {
@@ -4546,7 +4549,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           }
         }
 
-        if (device_param->device_type & CL_DEVICE_TYPE_GPU)
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
           if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
           {
@@ -4614,7 +4617,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         {
           if ((user_options->force == false) && (user_options->opencl_info == false))
           {
-            if (device_type & CL_DEVICE_TYPE_CPU)
+            if (opencl_device_type & CL_DEVICE_TYPE_CPU)
             {
               if (device_param->platform_vendor_id == VENDOR_ID_INTEL_SDK)
               {
@@ -4662,7 +4665,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
                 }
               }
             }
-            else if (device_type & CL_DEVICE_TYPE_GPU)
+            else if (opencl_device_type & CL_DEVICE_TYPE_GPU)
             {
               if (device_param->platform_vendor_id == VENDOR_ID_AMD)
               {
@@ -4811,36 +4814,36 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (CL_rc == -1) return -1;
 
-        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_AMD))
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_AMD))
         {
-          const bool has_vadd3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
+          const bool has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
           device_param->has_vadd3 = has_vadd3;
 
-          const bool has_vbfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
+          const bool has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
           device_param->has_vbfe = has_vbfe;
 
-          const bool has_vperm = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
+          const bool has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
           device_param->has_vperm = has_vperm;
         }
 
-        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
         {
-          const bool has_bfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
+          const bool has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
 
           device_param->has_bfe = has_bfe;
 
-          const bool has_lop3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");
+          const bool has_lop3 = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");
 
           device_param->has_lop3 = has_lop3;
 
-          const bool has_mov64 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
+          const bool has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
 
           device_param->has_mov64 = has_mov64;
 
-          const bool has_prmt = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
+          const bool has_prmt = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
 
           device_param->has_prmt = has_prmt;
         }
@@ -4853,9 +4856,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
 
         #if defined (_WIN)
-        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
         #else
-        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || (device_param->platform_vendor_id == VENDOR_ID_AMD)))
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || (device_param->platform_vendor_id == VENDOR_ID_AMD)))
         #endif
         {
           // OK, so the problem here is the following:
@@ -5246,7 +5249,7 @@ static u32 get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param
 
   // for CPU we just do 1 ...
 
-  if (device_param->device_type & CL_DEVICE_TYPE_CPU)
+  if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
   {
     if ((1 >= kernel_threads_min) && (1 <= kernel_threads_max))
     {
@@ -5420,11 +5423,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (user_options->slow_candidates == true)
       {
-        tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->device_type, 0, hashconfig->hash_mode);
+        tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode);
       }
       else
       {
-        tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->device_type, user_options->attack_mode, hashconfig->hash_mode);
+        tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode);
       }
 
       if (tuningdb_entry == NULL || tuningdb_entry->vector_width == -1)
@@ -5458,7 +5461,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
     {
-      if (device_param->device_type & CL_DEVICE_TYPE_GPU)
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
       {
         vector_width = 1;
       }
@@ -5483,11 +5486,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (user_options->slow_candidates == true)
     {
-      tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->device_type, 0, hashconfig->hash_mode);
+      tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, 0, hashconfig->hash_mode);
     }
     else
     {
-      tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->device_type, user_options->attack_mode, hashconfig->hash_mode);
+      tuningdb_entry = tuning_db_search (hashcat_ctx, device_param->device_name, device_param->opencl_device_type, user_options->attack_mode, hashconfig->hash_mode);
     }
 
     // user commandline option override tuning db
@@ -5738,15 +5741,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     // we don't have sm_* on vendors not NV but it doesn't matter
 
     #if defined (DEBUG)
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
     #else
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
     #endif
 
     build_options_buf[build_options_len] = 0;
 
     /*
-    if (device_param->device_type & CL_DEVICE_TYPE_CPU)
+    if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
     {
       if (device_param->platform_vendor_id == VENDOR_ID_INTEL_SDK)
       {
@@ -5826,7 +5829,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->platform_vendor_id == VENDOR_ID_APPLE)
     {
-      if (device_param->device_type & CL_DEVICE_TYPE_CPU)
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
       {
         cache_disable = true;
       }
diff --git a/src/hwmon.c b/src/hwmon.c
index d38d4f052..4c8dca868 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -1351,7 +1351,7 @@ int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
 
   if (hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1411,7 +1411,7 @@ int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
 
   if (hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1459,7 +1459,7 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
   if (hwmon_ctx->hm_device[device_id].temperature_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1542,7 +1542,7 @@ int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
 
   if (hwmon_ctx->hm_device[device_id].fanpolicy_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1600,7 +1600,7 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 
   if (hwmon_ctx->hm_device[device_id].fanspeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1689,7 +1689,7 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 
   if (hwmon_ctx->hm_device[device_id].buslanes_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1755,7 +1755,7 @@ int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
   if (hwmon_ctx->hm_device[device_id].utilization_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1807,7 +1807,7 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
   if (hwmon_ctx->hm_device[device_id].memoryspeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1873,7 +1873,7 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
 
   if (hwmon_ctx->hm_device[device_id].corespeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -1939,7 +1939,7 @@ int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 
   if (hwmon_ctx->hm_device[device_id].throttle_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
   if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
   {
@@ -2108,7 +2108,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->skipped == true) continue;
 
-        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
         if (device_param->device_vendor_id != VENDOR_ID_NV) continue;
 
@@ -2158,7 +2158,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->skipped == true) continue;
 
-        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
         if (device_param->device_vendor_id != VENDOR_ID_NV) continue;
 
@@ -2227,7 +2227,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->skipped == true) continue;
 
-        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
         if (device_param->device_vendor_id != VENDOR_ID_AMD) continue;
 
@@ -2275,7 +2275,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
       {
         hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
-        if ((device_param->device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
         hm_adapters_sysfs[hm_adapters_id].sysfs = device_id;
 
@@ -2320,7 +2320,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->skipped == true) continue;
 
-    if ((device_param->device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+    if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
     const u32 platform_devices_id = device_param->platform_devices_id;
 
diff --git a/src/modules/module_03200.c b/src/modules/module_03200.c
index 73a8d3a76..8a9cb5b7b 100644
--- a/src/modules/module_03200.c
+++ b/src/modules/module_03200.c
@@ -88,7 +88,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   u32 fixed_local_size = 0;
 
-  if (device_param->device_type & CL_DEVICE_TYPE_CPU)
+  if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
   {
     fixed_local_size = 1;
   }
diff --git a/src/modules/module_07900.c b/src/modules/module_07900.c
index 9c2d635ce..6e1102e56 100644
--- a/src/modules/module_07900.c
+++ b/src/modules/module_07900.c
@@ -289,7 +289,7 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   if (device_param->platform_vendor_id == VENDOR_ID_APPLE)
   {
     // trap 6
-    if ((device_param->device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->device_type & CL_DEVICE_TYPE_GPU))
+    if ((device_param->device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
     {
       return true;
     }
diff --git a/src/modules/module_09000.c b/src/modules/module_09000.c
index 7a459baaf..464f47d97 100644
--- a/src/modules/module_09000.c
+++ b/src/modules/module_09000.c
@@ -76,7 +76,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   u32 fixed_local_size = 0;
 
-  if (device_param->device_type & CL_DEVICE_TYPE_CPU)
+  if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
   {
     fixed_local_size = 1;
   }
diff --git a/src/modules/module_18600.c b/src/modules/module_18600.c
index 62ee9cdb9..c1d743e9b 100644
--- a/src/modules/module_18600.c
+++ b/src/modules/module_18600.c
@@ -68,7 +68,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   u32 fixed_local_size = 0;
 
-  if (device_param->device_type & CL_DEVICE_TYPE_CPU)
+  if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
   {
     fixed_local_size = 1;
   }
diff --git a/src/monitor.c b/src/monitor.c
index 6c5d4577e..aec2220d3 100644
--- a/src/monitor.c
+++ b/src/monitor.c
@@ -120,7 +120,7 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->skipped == true) continue;
 
-        if ((backend_ctx->devices_param[device_id].device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
         const int temperature = hm_get_temperature_with_device_id (hashcat_ctx, device_id);
 
diff --git a/src/terminal.c b/src/terminal.c
index 52cf341e3..394b724f1 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -687,7 +687,7 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
 
       if (device_param->platform != platform_id) continue;
 
-      cl_device_type device_type                = device_param->device_type;
+      cl_device_type opencl_device_type         = device_param->opencl_device_type;
       cl_uint        device_vendor_id           = device_param->device_vendor_id;
       char          *device_vendor              = device_param->device_vendor;
       char          *device_name                = device_param->device_name;
@@ -700,7 +700,7 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
       char          *driver_version             = device_param->driver_version;
 
       event_log_info (hashcat_ctx, "  Device ID #%u",         devices_idx + 1);
-      event_log_info (hashcat_ctx, "    Type           : %s", ((device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
+      event_log_info (hashcat_ctx, "    Type           : %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
       event_log_info (hashcat_ctx, "    Vendor ID      : %u", device_vendor_id);
       event_log_info (hashcat_ctx, "    Vendor         : %s", device_vendor);
       event_log_info (hashcat_ctx, "    Name           : %s", device_name);

From e3500ff4aa608839d4667c5dca252b2edcb39111 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 30 Apr 2019 13:38:44 +0200
Subject: [PATCH 15/73] Add CUDA device attributes to -I

---
 include/hwmon.h            |   20 +-
 include/status.h           |   54 +-
 include/thread.h           |    4 +-
 include/types.h            |   60 +-
 src/Makefile               |    2 +-
 src/backend.c              | 1539 +++++++++++++++++++-----------------
 src/dispatch.c             |    8 +-
 src/hashcat.c              |   44 +-
 src/hwmon.c                |  436 +++++-----
 src/modules/module_01450.c |    4 +-
 src/modules/module_01720.c |    4 +-
 src/modules/module_01722.c |    4 +-
 src/modules/module_01740.c |    4 +-
 src/modules/module_01750.c |    4 +-
 src/modules/module_01760.c |    4 +-
 src/modules/module_03200.c |    2 +-
 src/modules/module_06400.c |    4 +-
 src/modules/module_06800.c |    4 +-
 src/modules/module_07500.c |    2 +-
 src/modules/module_07800.c |    2 +-
 src/modules/module_07801.c |    2 +-
 src/modules/module_07900.c |    4 +-
 src/modules/module_08000.c |    2 +-
 src/modules/module_08600.c |    2 +-
 src/modules/module_09000.c |    4 +-
 src/modules/module_09200.c |    2 +-
 src/modules/module_09800.c |    2 +-
 src/modules/module_10700.c |    8 +-
 src/modules/module_10800.c |    4 +-
 src/modules/module_10900.c |    4 +-
 src/modules/module_11000.c |    2 +-
 src/modules/module_11600.c |    4 +-
 src/modules/module_11700.c |    2 +-
 src/modules/module_11750.c |    2 +-
 src/modules/module_11760.c |    2 +-
 src/modules/module_11800.c |    2 +-
 src/modules/module_11850.c |    2 +-
 src/modules/module_11860.c |    2 +-
 src/modules/module_12100.c |    2 +-
 src/modules/module_12200.c |    4 +-
 src/modules/module_12500.c |    2 +-
 src/modules/module_12800.c |    4 +-
 src/modules/module_12900.c |    4 +-
 src/modules/module_13000.c |    4 +-
 src/modules/module_13100.c |    2 +-
 src/modules/module_14100.c |    2 +-
 src/modules/module_14400.c |    2 +-
 src/modules/module_15000.c |    4 +-
 src/modules/module_15300.c |    2 +-
 src/modules/module_15600.c |    4 +-
 src/modules/module_15700.c |    2 +-
 src/modules/module_15900.c |    2 +-
 src/modules/module_16200.c |    4 +-
 src/modules/module_16300.c |    4 +-
 src/modules/module_16700.c |    4 +-
 src/modules/module_16900.c |    4 +-
 src/modules/module_17300.c |    2 +-
 src/modules/module_17400.c |    2 +-
 src/modules/module_17500.c |    2 +-
 src/modules/module_17600.c |    2 +-
 src/modules/module_17700.c |    2 +-
 src/modules/module_17800.c |    2 +-
 src/modules/module_17900.c |    2 +-
 src/modules/module_18000.c |    2 +-
 src/modules/module_18100.c |    2 +-
 src/modules/module_18200.c |    2 +-
 src/modules/module_18300.c |    4 +-
 src/modules/module_18600.c |    4 +-
 src/modules/module_19100.c |    4 +-
 src/modules/module_19200.c |    4 +-
 src/monitor.c              |   30 +-
 src/status.c               |  136 ++--
 src/terminal.c             |  169 ++--
 73 files changed, 1404 insertions(+), 1280 deletions(-)

diff --git a/include/hwmon.h b/include/hwmon.h
index e3f64ec7d..7976cd9ac 100644
--- a/include/hwmon.h
+++ b/include/hwmon.h
@@ -11,16 +11,16 @@
 #ifndef _HWMON_H
 #define _HWMON_H
 
-int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_temperature_with_device_id        (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_fanpolicy_with_device_id          (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_fanspeed_with_device_id           (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_buslanes_with_device_id           (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_utilization_with_device_id        (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_memoryspeed_with_device_id        (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_corespeed_with_device_id          (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
-int hm_get_throttle_with_device_id           (hashcat_ctx_t *hashcat_ctx, const u32 device_id);
+int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_threshold_shutdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_temperature_with_devices_idx        (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_fanpolicy_with_devices_idx          (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_fanspeed_with_devices_idx           (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_buslanes_with_devices_idx           (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_utilization_with_devices_idx        (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_memoryspeed_with_devices_idx        (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_corespeed_with_devices_idx          (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
+int hm_get_throttle_with_devices_idx           (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx);
 
 int  hwmon_ctx_init    (hashcat_ctx_t *hashcat_ctx);
 void hwmon_ctx_destroy (hashcat_ctx_t *hashcat_ctx);
diff --git a/include/status.h b/include/status.h
index 15efe5d01..2727d6172 100644
--- a/include/status.h
+++ b/include/status.h
@@ -19,8 +19,8 @@ void format_speed_display_1k (double val,    char *buf, size_t len);
 
 int         status_get_device_info_cnt                (const hashcat_ctx_t *hashcat_ctx);
 int         status_get_device_info_active             (const hashcat_ctx_t *hashcat_ctx);
-bool        status_get_skipped_dev                    (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-bool        status_get_skipped_warning_dev            (const hashcat_ctx_t *hashcat_ctx, const int device_id);
+bool        status_get_skipped_dev                    (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+bool        status_get_skipped_warning_dev            (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
 char       *status_get_session                        (const hashcat_ctx_t *hashcat_ctx);
 const char *status_get_status_string                  (const hashcat_ctx_t *hashcat_ctx);
 int         status_get_status_number                  (const hashcat_ctx_t *hashcat_ctx);
@@ -35,7 +35,7 @@ int         status_get_guess_mod_count                (const hashcat_ctx_t *hash
 double      status_get_guess_mod_percent              (const hashcat_ctx_t *hashcat_ctx);
 char       *status_get_guess_charset                  (const hashcat_ctx_t *hashcat_ctx);
 int         status_get_guess_mask_length              (const hashcat_ctx_t *hashcat_ctx);
-char       *status_get_guess_candidates_dev           (const hashcat_ctx_t *hashcat_ctx, const int device_id);
+char       *status_get_guess_candidates_dev           (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
 char       *status_get_hash_name                      (const hashcat_ctx_t *hashcat_ctx);
 char       *status_get_hash_target                    (const hashcat_ctx_t *hashcat_ctx);
 int         status_get_digests_done                   (const hashcat_ctx_t *hashcat_ctx);
@@ -68,12 +68,12 @@ u64         status_get_progress_skip                  (const hashcat_ctx_t *hash
 u64         status_get_progress_cur_relative_skip     (const hashcat_ctx_t *hashcat_ctx);
 u64         status_get_progress_end_relative_skip     (const hashcat_ctx_t *hashcat_ctx);
 double      status_get_hashes_msec_all                (const hashcat_ctx_t *hashcat_ctx);
-double      status_get_hashes_msec_dev                (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-double      status_get_hashes_msec_dev_benchmark      (const hashcat_ctx_t *hashcat_ctx, const int device_id);
+double      status_get_hashes_msec_dev                (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+double      status_get_hashes_msec_dev_benchmark      (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
 double      status_get_exec_msec_all                  (const hashcat_ctx_t *hashcat_ctx);
-double      status_get_exec_msec_dev                  (const hashcat_ctx_t *hashcat_ctx, const int device_id);
+double      status_get_exec_msec_dev                  (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
 char       *status_get_speed_sec_all                  (const hashcat_ctx_t *hashcat_ctx);
-char       *status_get_speed_sec_dev                  (const hashcat_ctx_t *hashcat_ctx, const int device_id);
+char       *status_get_speed_sec_dev                  (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
 int         status_get_cpt_cur_min                    (const hashcat_ctx_t *hashcat_ctx);
 int         status_get_cpt_cur_hour                   (const hashcat_ctx_t *hashcat_ctx);
 int         status_get_cpt_cur_day                    (const hashcat_ctx_t *hashcat_ctx);
@@ -81,30 +81,30 @@ int         status_get_cpt_avg_min                    (const hashcat_ctx_t *hash
 int         status_get_cpt_avg_hour                   (const hashcat_ctx_t *hashcat_ctx);
 int         status_get_cpt_avg_day                    (const hashcat_ctx_t *hashcat_ctx);
 char       *status_get_cpt                            (const hashcat_ctx_t *hashcat_ctx);
-int         status_get_salt_pos_dev                   (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_innerloop_pos_dev              (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_innerloop_left_dev             (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_iteration_pos_dev              (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_iteration_left_dev             (const hashcat_ctx_t *hashcat_ctx, const int device_id);
+int         status_get_salt_pos_dev                   (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_innerloop_pos_dev              (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_innerloop_left_dev             (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_iteration_pos_dev              (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_iteration_left_dev             (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
 #ifdef WITH_BRAIN
 int         status_get_brain_session                  (const hashcat_ctx_t *hashcat_ctx);
 int         status_get_brain_attack                   (const hashcat_ctx_t *hashcat_ctx);
-int         status_get_brain_link_client_id_dev       (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_brain_link_status_dev          (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-char       *status_get_brain_link_recv_bytes_dev      (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-char       *status_get_brain_link_send_bytes_dev      (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-char       *status_get_brain_link_recv_bytes_sec_dev  (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-char       *status_get_brain_link_send_bytes_sec_dev  (const hashcat_ctx_t *hashcat_ctx, const int device_id);
+int         status_get_brain_link_client_id_dev       (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_brain_link_status_dev          (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+char       *status_get_brain_link_recv_bytes_dev      (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+char       *status_get_brain_link_send_bytes_dev      (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+char       *status_get_brain_link_recv_bytes_sec_dev  (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+char       *status_get_brain_link_send_bytes_sec_dev  (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
 #endif
-char       *status_get_hwmon_dev                      (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_corespeed_dev                  (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_memoryspeed_dev                (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-u64         status_get_progress_dev                   (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-double      status_get_runtime_msec_dev               (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_kernel_accel_dev               (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_kernel_loops_dev               (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_kernel_threads_dev             (const hashcat_ctx_t *hashcat_ctx, const int device_id);
-int         status_get_vector_width_dev               (const hashcat_ctx_t *hashcat_ctx, const int device_id);
+char       *status_get_hwmon_dev                      (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_corespeed_dev                  (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_memoryspeed_dev                (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+u64         status_get_progress_dev                   (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+double      status_get_runtime_msec_dev               (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_kernel_accel_dev               (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_kernel_loops_dev               (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_kernel_threads_dev             (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
+int         status_get_vector_width_dev               (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx);
 
 int         status_progress_init                      (hashcat_ctx_t *hashcat_ctx);
 void        status_progress_destroy                   (hashcat_ctx_t *hashcat_ctx);
diff --git a/include/thread.h b/include/thread.h
index d9e594a06..8f06d0e29 100644
--- a/include/thread.h
+++ b/include/thread.h
@@ -18,7 +18,7 @@
 #if defined (_WIN)
 
 #define hc_thread_create(t,f,a)     t = CreateThread (NULL, 0, (LPTHREAD_START_ROUTINE) &f, a, 0, NULL)
-#define hc_thread_wait(n,a)         for (u32 i = 0; i < n; i++) WaitForSingleObject ((a)[i], INFINITE)
+#define hc_thread_wait(n,a)         for (int i = 0; i < n; i++) WaitForSingleObject ((a)[i], INFINITE)
 #define hc_thread_exit(t)           ExitThread (t)
 #define hc_thread_detach(t)         CloseHandle (t)
 
@@ -42,7 +42,7 @@
 #else
 
 #define hc_thread_create(t,f,a)     pthread_create (&t, NULL, f, a)
-#define hc_thread_wait(n,a)         for (u32 i = 0; i < n; i++) pthread_join ((a)[i], NULL)
+#define hc_thread_wait(n,a)         for (int i = 0; i < n; i++) pthread_join ((a)[i], NULL)
 #define hc_thread_exit(t)           pthread_exit (&t)
 #define hc_thread_detach(t)         pthread_detach (t)
 
diff --git a/include/types.h b/include/types.h
index 386dfba55..ba8b0ea78 100644
--- a/include/types.h
+++ b/include/types.h
@@ -995,13 +995,13 @@ typedef struct link_speed
 
 typedef struct hc_device_param
 {
-  u32     device_id;
+  int     device_id;
 
   u8      pcie_bus;
   u8      pcie_device;
   u8      pcie_function;
 
-  u32     platform_devices_id;  // for mapping with hms devices
+  u32     opencl_platform_devices_id;  // for mapping with hms devices
 
   bool    skipped;              // permanent
   bool    skipped_warning;      // iteration
@@ -1014,6 +1014,7 @@ typedef struct hc_device_param
   size_t  device_maxworkgroup_size;
   u64     device_local_mem_size;
   int     device_local_mem_type;
+  char   *device_name;
 
   int     sm_major;
   int     sm_minor;
@@ -1198,14 +1199,6 @@ typedef struct hc_device_param
 
   hc_timer_t timer_speed;
 
-  // device specific attributes starting
-
-  char   *device_name;
-  char   *device_vendor;
-  char   *device_version;
-  char   *driver_version;
-  char   *device_opencl_version;
-
   // AMD
   bool    has_vadd3;
   bool    has_vbfe;
@@ -1256,17 +1249,25 @@ typedef struct hc_device_param
 
   // API: cuda
 
+  bool   is_cuda;
+
   CUdevice        cuda_device;
 
   // API: opencl
 
-  cl_device_id    device;
+  bool   is_opencl;
+
+  cl_device_id    opencl_device;
+
+  char   *opencl_driver_version;
+  char   *opencl_device_vendor;
+  char   *opencl_device_version;
+  char   *opencl_device_c_version;
+
+  cl_platform_id  opencl_platform;
   cl_device_type  opencl_device_type;
-
-  cl_platform_id platform;
-
-  cl_uint  device_vendor_id;
-  cl_uint  platform_vendor_id;
+  cl_uint         opencl_device_vendor_id;
+  cl_uint         opencl_platform_vendor_id;
 
   cl_kernel  kernel1;
   cl_kernel  kernel12;
@@ -1346,10 +1347,9 @@ typedef struct backend_ctx
   void               *cuda;
   void               *nvrtc;
 
-  int                *backend_device_from_cuda;   // from cuda device index to backend device index
-  int                *backend_device_to_cuda;     // from backend device index to cuda device index
-  int                *backend_device_from_opencl; // from opencl device index to backend device index
-  int                *backend_device_to_opencl;   // from backend device index to opencl device index
+  int                 backend_device_from_cuda[DEVICES_MAX];                              // from cuda device index to backend device index
+  int                 backend_device_from_opencl[DEVICES_MAX];                            // from opencl device index to backend device index
+  int                 backend_device_from_opencl_platform[CL_PLATFORMS_MAX][DEVICES_MAX]; // from opencl device index to backend device index (by platform)
 
   int                 backend_devices_cnt;
   int                 backend_devices_active;
@@ -1358,9 +1358,6 @@ typedef struct backend_ctx
   int                 opencl_devices_cnt;
   int                 opencl_devices_active;
 
-  u32                 devices_cnt;
-  u32                 devices_active;
-
   hc_device_param_t  *devices_param;
 
   u32                 hardware_power_all;
@@ -1387,15 +1384,14 @@ typedef struct backend_ctx
 
   // opencl
 
-  cl_uint             platforms_cnt;
-  cl_platform_id     *platforms;
-  char              **platforms_vendor;
-  char              **platforms_name;
-  char              **platforms_version;
-  bool               *platforms_skipped;
-
-  cl_uint             platform_devices_cnt;
-  cl_device_id       *platform_devices;
+  cl_platform_id     *opencl_platforms;
+  cl_uint             opencl_platforms_cnt;
+  cl_device_id      **opencl_platforms_devices;
+  cl_uint            *opencl_platforms_devices_cnt;
+  char              **opencl_platforms_name;
+  bool               *opencl_platforms_skipped;
+  char              **opencl_platforms_vendor;
+  char              **opencl_platforms_version;
 
   u64                 opencl_platforms_filter;
   cl_device_type      opencl_device_types_filter;
diff --git a/src/Makefile b/src/Makefile
index 3f90fafe0..9ffea7d67 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,7 +4,7 @@
 ##
 
 SHARED                  := 0
-DEBUG                   := 0
+DEBUG                   := 1
 PRODUCTION              := 0
 PRODUCTION_VERSION      := v5.1.0
 ENABLE_BRAIN            := 1
diff --git a/src/backend.c b/src/backend.c
index 47b93f74c..1dddf5c53 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -105,6 +105,49 @@ static int ocl_check_dri (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx)
   return 0;
 }
 
+static bool setup_devices_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_devices, u64 *out)
+{
+  u64 backend_devices_filter = 0;
+
+  if (opencl_devices)
+  {
+    char *devices = hcstrdup (opencl_devices);
+
+    if (devices == NULL) return false;
+
+    char *saveptr = NULL;
+
+    char *next = strtok_r (devices, ",", &saveptr);
+
+    do
+    {
+      const int backend_device_id = (const int) strtol (next, NULL, 10);
+
+      if ((backend_device_id <= 0) || (backend_device_id >= 64))
+      {
+        event_log_error (hashcat_ctx, "Invalid device_id %d specified.", backend_device_id);
+
+        hcfree (devices);
+
+        return false;
+      }
+
+      backend_devices_filter |= 1ULL << (backend_device_id - 1);
+
+    } while ((next = strtok_r ((char *) NULL, ",", &saveptr)) != NULL);
+
+    hcfree (devices);
+  }
+  else
+  {
+    backend_devices_filter = -1ULL;
+  }
+
+  *out = backend_devices_filter;
+
+  return true;
+}
+
 static bool setup_opencl_platforms_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_platforms, u64 *out)
 {
   u64 opencl_platforms_filter = 0;
@@ -148,49 +191,6 @@ static bool setup_opencl_platforms_filter (hashcat_ctx_t *hashcat_ctx, const cha
   return true;
 }
 
-static bool setup_devices_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_devices, u64 *out)
-{
-  u64 devices_filter = 0;
-
-  if (opencl_devices)
-  {
-    char *devices = hcstrdup (opencl_devices);
-
-    if (devices == NULL) return false;
-
-    char *saveptr = NULL;
-
-    char *next = strtok_r (devices, ",", &saveptr);
-
-    do
-    {
-      const int device_id = (const int) strtol (next, NULL, 10);
-
-      if ((device_id <= 0) || (device_id >= 64))
-      {
-        event_log_error (hashcat_ctx, "Invalid device_id %d specified.", device_id);
-
-        hcfree (devices);
-
-        return false;
-      }
-
-      devices_filter |= 1ULL << (device_id - 1);
-
-    } while ((next = strtok_r ((char *) NULL, ",", &saveptr)) != NULL);
-
-    hcfree (devices);
-  }
-  else
-  {
-    devices_filter = -1ULL;
-  }
-
-  *out = devices_filter;
-
-  return true;
-}
-
 static bool setup_opencl_device_types_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_device_types, cl_device_type *out)
 {
   cl_device_type opencl_device_types_filter = 0;
@@ -237,6 +237,77 @@ static bool setup_opencl_device_types_filter (hashcat_ctx_t *hashcat_ctx, const
   return true;
 }
 
+static bool opencl_test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, const char *kernel_buf)
+{
+  int CL_rc;
+
+  cl_program program;
+
+  CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, context, 1, &kernel_buf, NULL, &program);
+
+  if (CL_rc == -1) return false;
+
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  OCL_PTR *ocl = backend_ctx->ocl;
+
+  // LLVM seems to write an error message (if there's an error) directly to stderr
+  // and not (as supposted to) into buffer for later request using clGetProgramBuildInfo()
+
+  #ifndef DEBUG
+  #ifndef _WIN
+  fflush (stderr);
+  int bak = dup (2);
+  int tmp = open ("/dev/null", O_WRONLY);
+  dup2 (tmp, 2);
+  close (tmp);
+  #endif
+  #endif
+
+  CL_rc = ocl->clBuildProgram (program, 1, &device, "-Werror", NULL, NULL); // do not use the wrapper to avoid the error message
+
+  #ifndef DEBUG
+  #ifndef _WIN
+  fflush (stderr);
+  dup2 (bak, 2);
+  close (bak);
+  #endif
+  #endif
+
+  if (CL_rc != CL_SUCCESS)
+  {
+    #if defined (DEBUG)
+
+    event_log_error (hashcat_ctx, "clBuildProgram(): %s", val2cstr_cl (CL_rc));
+
+    size_t build_log_size = 0;
+
+    hc_clGetProgramBuildInfo (hashcat_ctx, program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+
+    char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+    hc_clGetProgramBuildInfo (hashcat_ctx, program, device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+
+    build_log[build_log_size] = 0;
+
+    puts (build_log);
+
+    hcfree (build_log);
+
+    #endif
+
+    hc_clReleaseProgram (hashcat_ctx, program);
+
+    return false;
+  }
+
+  CL_rc = hc_clReleaseProgram (hashcat_ctx, program);
+
+  if (CL_rc == -1) return false;
+
+  return true;
+}
+
 static bool read_kernel_binary (hashcat_ctx_t *hashcat_ctx, const char *kernel_file, size_t *kernel_lengths, char **kernel_sources, const bool force_recompile)
 {
   FILE *fp = fopen (kernel_file, "rb");
@@ -330,77 +401,6 @@ static bool write_kernel_binary (hashcat_ctx_t *hashcat_ctx, char *kernel_file,
   return true;
 }
 
-static bool opencl_test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, const char *kernel_buf)
-{
-  int CL_rc;
-
-  cl_program program;
-
-  CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, context, 1, &kernel_buf, NULL, &program);
-
-  if (CL_rc == -1) return false;
-
-  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
-
-  OCL_PTR *ocl = backend_ctx->ocl;
-
-  // LLVM seems to write an error message (if there's an error) directly to stderr
-  // and not (as supposted to) into buffer for later request using clGetProgramBuildInfo()
-
-  #ifndef DEBUG
-  #ifndef _WIN
-  fflush (stderr);
-  int bak = dup (2);
-  int tmp = open ("/dev/null", O_WRONLY);
-  dup2 (tmp, 2);
-  close (tmp);
-  #endif
-  #endif
-
-  CL_rc = ocl->clBuildProgram (program, 1, &device, "-Werror", NULL, NULL); // do not use the wrapper to avoid the error message
-
-  #ifndef DEBUG
-  #ifndef _WIN
-  fflush (stderr);
-  dup2 (bak, 2);
-  close (bak);
-  #endif
-  #endif
-
-  if (CL_rc != CL_SUCCESS)
-  {
-    #if defined (DEBUG)
-
-    event_log_error (hashcat_ctx, "clBuildProgram(): %s", val2cstr_cl (CL_rc));
-
-    size_t build_log_size = 0;
-
-    hc_clGetProgramBuildInfo (hashcat_ctx, program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
-
-    char *build_log = (char *) hcmalloc (build_log_size + 1);
-
-    hc_clGetProgramBuildInfo (hashcat_ctx, program, device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
-
-    build_log[build_log_size] = 0;
-
-    puts (build_log);
-
-    hcfree (build_log);
-
-    #endif
-
-    hc_clReleaseProgram (hashcat_ctx, program);
-
-    return false;
-  }
-
-  CL_rc = hc_clReleaseProgram (hashcat_ctx, program);
-
-  if (CL_rc == -1) return false;
-
-  return true;
-}
-
 void generate_source_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *shared_dir, char *source_file)
 {
   if (opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
@@ -3464,13 +3464,6 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     cuda_close (hashcat_ctx);
   }
 
-  const int rc_cuInit = hc_cuInit (hashcat_ctx, 0);
-
-  if (rc_cuInit == -1)
-  {
-    cuda_close (hashcat_ctx);
-  }
-
   /**
    * Load and map NVRTC library calls
    */
@@ -3490,7 +3483,19 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
    * Check if both CUDA and NVRTC were load successful
    */
 
-  if ((rc_cuda_init == -1) || (rc_nvrtc_init == -1))
+  if ((rc_cuda_init == 0) && (rc_nvrtc_init == 0))
+  {
+    // cuda version
+
+    int cuda_driver_version = 0;
+
+    const int rc_cuDriverGetVersion = hc_cuDriverGetVersion (hashcat_ctx, &cuda_driver_version);
+
+    if (rc_cuDriverGetVersion == -1) return -1;
+
+    backend_ctx->cuda_driver_version = cuda_driver_version;
+  }
+  else
   {
     cuda_close  (hashcat_ctx);
     nvrtc_close (hashcat_ctx);
@@ -3517,108 +3522,14 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if ((rc_cuda_init == -1) && (rc_ocl_init == -1))
   {
-    return -1;
-  }
+    event_log_error (hashcat_ctx, "ATTENTION! No CUDA or OpenCL installation found.");
 
-  /**
-   * Some permission pre-check, because AMDGPU-PRO Driver crashes if the user has no permission to do this
-   */
-
-  const int rc_ocl_check = ocl_check_dri (hashcat_ctx);
-
-  if (rc_ocl_check == -1) return -1;
-
-  /**
-   * OpenCL platform selection
-   */
-
-  u64 opencl_platforms_filter;
-
-  const bool rc_platforms_filter = setup_opencl_platforms_filter (hashcat_ctx, user_options->opencl_platforms, &opencl_platforms_filter);
-
-  if (rc_platforms_filter == false) return -1;
-
-  backend_ctx->opencl_platforms_filter = opencl_platforms_filter;
-
-  /**
-   * OpenCL device selection
-   */
-
-  u64 devices_filter;
-
-  const bool rc_devices_filter = setup_devices_filter (hashcat_ctx, user_options->opencl_devices, &devices_filter);
-
-  if (rc_devices_filter == false) return -1;
-
-  backend_ctx->devices_filter = devices_filter;
-
-  /**
-   * OpenCL device type selection
-   */
-
-  cl_device_type opencl_device_types_filter;
-
-  const bool rc_opencl_device_types_filter = setup_opencl_device_types_filter (hashcat_ctx, user_options->opencl_device_types, &opencl_device_types_filter);
-
-  if (rc_opencl_device_types_filter == false) return -1;
-
-  backend_ctx->opencl_device_types_filter = opencl_device_types_filter;
-
-  /**
-   * Backend structures
-   */
-
-  #define FREE_BACKEND_CTX_ON_ERROR       \
-  {                                       \
-    hcfree (backend_device_from_cuda);    \
-    hcfree (backend_device_to_cuda);      \
-    hcfree (backend_device_from_opencl);  \
-    hcfree (backend_device_to_opencl);    \
-    hcfree (platforms_vendor);            \
-    hcfree (platforms_name);              \
-    hcfree (platforms_version);           \
-    hcfree (platforms_skipped);           \
-    hcfree (platforms);                   \
-    hcfree (platform_devices);            \
-  }
-
-  int *backend_device_from_cuda   = (int *) hccalloc (DEVICES_MAX, sizeof (int));
-  int *backend_device_to_cuda     = (int *) hccalloc (DEVICES_MAX, sizeof (int));
-  int *backend_device_from_opencl = (int *) hccalloc (DEVICES_MAX, sizeof (int));
-  int *backend_device_to_opencl   = (int *) hccalloc (DEVICES_MAX, sizeof (int));
-
-  /**
-   * OpenCL platforms: detect
-   */
-
-  char          **platforms_vendor      = (char **) hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
-  char          **platforms_name        = (char **) hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
-  char          **platforms_version     = (char **) hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
-  bool           *platforms_skipped     = (bool *)  hccalloc (CL_PLATFORMS_MAX, sizeof (bool));
-  cl_uint         platforms_cnt         = 0;
-  cl_platform_id *platforms             = (cl_platform_id *) hccalloc (CL_PLATFORMS_MAX, sizeof (cl_platform_id));
-  cl_uint         platform_devices_cnt  = 0;
-  cl_device_id   *platform_devices      = (cl_device_id *) hccalloc (DEVICES_MAX, sizeof (cl_device_id));
-
-  int CL_rc = hc_clGetPlatformIDs (hashcat_ctx, CL_PLATFORMS_MAX, platforms, &platforms_cnt);
-
-  if (CL_rc == -1)
-  {
-    FREE_BACKEND_CTX_ON_ERROR;
-
-    return -1;
-  }
-
-  if (platforms_cnt == 0)
-  {
-    event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible platform found.");
-
-    event_log_warning (hashcat_ctx, "You are probably missing the OpenCL runtime installation.");
+    event_log_warning (hashcat_ctx, "You are probably missing the CUDA or OpenCL runtime installation.");
     event_log_warning (hashcat_ctx, NULL);
 
     #if defined (__linux__)
     event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"AMDGPU-PRO Driver\" (16.40 or later)");
+    event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (1.6.180 or later)");
     #elif defined (_WIN)
     event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this runtime and/or driver:");
     event_log_warning (hashcat_ctx, "  \"AMD Radeon Software Crimson Edition\" (15.12 or later)");
@@ -3640,103 +3551,251 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
     event_log_warning (hashcat_ctx, NULL);
 
-    FREE_BACKEND_CTX_ON_ERROR;
+    return -1;
+  }
+
+  /**
+   * Some permission pre-check, because AMDGPU-PRO Driver crashes if the user has no permission to do this
+   */
+
+  const int rc_ocl_check = ocl_check_dri (hashcat_ctx);
+
+  if (rc_ocl_check == -1) return -1;
+
+  /**
+   * OpenCL device selection
+   */
+
+  u64 devices_filter;
+
+  const bool rc_devices_filter = setup_devices_filter (hashcat_ctx, user_options->opencl_devices, &devices_filter);
+
+  if (rc_devices_filter == false) return -1;
+
+  backend_ctx->devices_filter = devices_filter;
+
+  /**
+   * CUDA API: init
+   */
+
+  if (backend_ctx->cuda)
+  {
+    const int rc_cuInit = hc_cuInit (hashcat_ctx, 0);
+
+    if (rc_cuInit == -1)
+    {
+      cuda_close (hashcat_ctx);
+    }
+  }
+
+  /**
+   * OpenCL API: init
+   */
+
+  if (backend_ctx->ocl)
+  {
+    #define FREE_OPENCL_CTX_ON_ERROR          \
+    {                                         \
+      hcfree (opencl_platforms);              \
+      hcfree (opencl_platforms_devices);      \
+      hcfree (opencl_platforms_devices_cnt);  \
+      hcfree (opencl_platforms_name);         \
+      hcfree (opencl_platforms_skipped);      \
+      hcfree (opencl_platforms_vendor);       \
+      hcfree (opencl_platforms_version);      \
+    }
+
+    cl_platform_id *opencl_platforms             = (cl_platform_id *) hccalloc (CL_PLATFORMS_MAX, sizeof (cl_platform_id));
+    cl_uint         opencl_platforms_cnt         = 0;
+    cl_device_id  **opencl_platforms_devices     = (cl_device_id **)  hccalloc (CL_PLATFORMS_MAX, sizeof (cl_device_id *));
+    cl_uint        *opencl_platforms_devices_cnt = (cl_uint *)        hccalloc (CL_PLATFORMS_MAX, sizeof (cl_uint));
+    char          **opencl_platforms_name        = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
+    bool           *opencl_platforms_skipped     = (bool *)           hccalloc (CL_PLATFORMS_MAX, sizeof (bool));
+    char          **opencl_platforms_vendor      = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
+    char          **opencl_platforms_version     = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
+
+    int CL_rc = hc_clGetPlatformIDs (hashcat_ctx, CL_PLATFORMS_MAX, opencl_platforms, &opencl_platforms_cnt);
+
+    if (CL_rc == -1)
+    {
+      opencl_platforms_cnt = 0;
+
+      FREE_OPENCL_CTX_ON_ERROR;
+
+      ocl_close (hashcat_ctx);
+    }
+
+    if (opencl_platforms_cnt)
+    {
+      /**
+       * OpenCL platform selection
+       */
+
+      u64 opencl_platforms_filter;
+
+      const bool rc_platforms_filter = setup_opencl_platforms_filter (hashcat_ctx, user_options->opencl_platforms, &opencl_platforms_filter);
+
+      if (rc_platforms_filter == false) return -1;
+
+      backend_ctx->opencl_platforms_filter = opencl_platforms_filter;
+
+      if (opencl_platforms_filter != (u64) -1)
+      {
+        u64 opencl_platform_cnt_mask = ~(((u64) -1 >> opencl_platforms_cnt) << opencl_platforms_cnt);
+
+        if (opencl_platforms_filter > opencl_platform_cnt_mask)
+        {
+          event_log_error (hashcat_ctx, "An invalid platform was specified using the --opencl-platforms parameter.");
+          event_log_error (hashcat_ctx, "The specified platform was higher than the number of available platforms (%u).", opencl_platforms_cnt);
+
+          FREE_OPENCL_CTX_ON_ERROR;
+
+          return -1;
+        }
+      }
+
+      /**
+       * OpenCL device type selection
+       */
+
+      cl_device_type opencl_device_types_filter;
+
+      const bool rc_opencl_device_types_filter = setup_opencl_device_types_filter (hashcat_ctx, user_options->opencl_device_types, &opencl_device_types_filter);
+
+      if (rc_opencl_device_types_filter == false) return -1;
+
+      backend_ctx->opencl_device_types_filter = opencl_device_types_filter;
+
+      if (user_options->opencl_device_types == NULL)
+      {
+        /**
+         * OpenCL device types:
+         *   In case the user did not specify --opencl-device-types and the user runs hashcat in a system with only a CPU only he probably want to use that CPU.
+         */
+
+        cl_device_type opencl_device_types_all = 0;
+
+        for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
+        {
+          cl_platform_id opencl_platform = opencl_platforms[opencl_platforms_idx];
+
+          cl_device_id *opencl_platform_devices = (cl_device_id *) hccalloc (DEVICES_MAX, sizeof (cl_device_id));
+
+          cl_uint opencl_platform_devices_cnt = 0;
+
+          CL_rc = hc_clGetDeviceIDs (hashcat_ctx, opencl_platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, opencl_platform_devices, &opencl_platform_devices_cnt);
+
+          if (CL_rc == -1)
+          {
+            hcfree (opencl_platform_devices);
+
+            FREE_OPENCL_CTX_ON_ERROR;
+
+            continue;
+          }
+
+          if ((opencl_platforms_filter & (1ULL << opencl_platforms_idx)) == 0)
+          {
+            hcfree (opencl_platform_devices);
+
+            continue;
+          }
+
+          for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++)
+          {
+            cl_device_id opencl_device = opencl_platform_devices[opencl_platform_devices_idx];
+
+            cl_device_type opencl_device_type;
+
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, opencl_device, CL_DEVICE_TYPE, sizeof (opencl_device_type), &opencl_device_type, NULL);
+
+            if (CL_rc == -1)
+            {
+              FREE_OPENCL_CTX_ON_ERROR;
+
+              return -1;
+            }
+
+            opencl_device_types_all |= opencl_device_type;
+          }
+
+          hcfree (opencl_platform_devices);
+        }
+
+        // In such a case, automatically enable CPU device type support, since it's disabled by default.
+
+        if ((opencl_device_types_all & (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR)) == 0)
+        {
+          opencl_device_types_filter |= CL_DEVICE_TYPE_CPU;
+        }
+
+        // In another case, when the user uses --stdout, using CPU devices is much faster to setup
+        // If we have a CPU device, force it to be used
+
+        if (user_options->stdout_flag == true)
+        {
+          if (opencl_device_types_all & CL_DEVICE_TYPE_CPU)
+          {
+            opencl_device_types_filter = CL_DEVICE_TYPE_CPU;
+          }
+        }
+
+        backend_ctx->opencl_device_types_filter = opencl_device_types_filter;
+      }
+    }
+
+    backend_ctx->opencl_platforms             = opencl_platforms;
+    backend_ctx->opencl_platforms_cnt         = opencl_platforms_cnt;
+    backend_ctx->opencl_platforms_devices     = opencl_platforms_devices;
+    backend_ctx->opencl_platforms_devices_cnt = opencl_platforms_devices_cnt;
+    backend_ctx->opencl_platforms_name        = opencl_platforms_name;
+    backend_ctx->opencl_platforms_skipped     = opencl_platforms_skipped;
+    backend_ctx->opencl_platforms_vendor      = opencl_platforms_vendor;
+    backend_ctx->opencl_platforms_version     = opencl_platforms_version;
+
+    #undef FREE_OPENCL_CTX_ON_ERROR
+  }
+
+  /**
+   * Final checks
+   */
+
+  if ((backend_ctx->cuda == NULL) && (backend_ctx->ocl == NULL))
+  {
+    event_log_error (hashcat_ctx, "ATTENTION! No CUDA-compatible or OpenCL-compatible platform found.");
+
+    event_log_warning (hashcat_ctx, "You are probably missing the CUDA or OpenCL runtime installation.");
+    event_log_warning (hashcat_ctx, NULL);
+
+    #if defined (__linux__)
+    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (1.6.180 or later)");
+    #elif defined (_WIN)
+    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"AMD Radeon Software Crimson Edition\" (15.12 or later)");
+    #endif
+
+    event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"OpenCL Runtime for Intel Core and Intel Xeon Processors\" (16.1.1 or later)");
+
+    #if defined (__linux__)
+    event_log_warning (hashcat_ctx, "* Intel GPUs on Linux require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"OpenCL 2.0 GPU Driver Package for Linux\" (2.0 or later)");
+    #elif defined (_WIN)
+    event_log_warning (hashcat_ctx, "* Intel GPUs on Windows require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"OpenCL Driver for Intel Iris and Intel HD Graphics\"");
+    #endif
+
+    event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
+    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
+    event_log_warning (hashcat_ctx, NULL);
 
     return -1;
   }
 
-  if (opencl_platforms_filter != (u64) -1)
-  {
-    u64 platform_cnt_mask = ~(((u64) -1 >> platforms_cnt) << platforms_cnt);
-
-    if (opencl_platforms_filter > platform_cnt_mask)
-    {
-      event_log_error (hashcat_ctx, "An invalid platform was specified using the --opencl-platforms parameter.");
-      event_log_error (hashcat_ctx, "The specified platform was higher than the number of available platforms (%u).", platforms_cnt);
-
-      FREE_BACKEND_CTX_ON_ERROR;
-
-      return -1;
-    }
-  }
-
-  if (user_options->opencl_device_types == NULL)
-  {
-    /**
-     * OpenCL device types:
-     *   In case the user did not specify --opencl-device-types and the user runs hashcat in a system with only a CPU only he probably want to use that CPU.
-     */
-
-    cl_device_type device_types_all = 0;
-
-    for (u32 platform_id = 0; platform_id < platforms_cnt; platform_id++)
-    {
-      if ((opencl_platforms_filter & (1ULL << platform_id)) == 0) continue;
-
-      cl_platform_id platform = platforms[platform_id];
-
-      CL_rc = hc_clGetDeviceIDs (hashcat_ctx, platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, platform_devices, &platform_devices_cnt);
-
-      if (CL_rc == -1) continue;
-
-      for (u32 platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
-      {
-        cl_device_id device = platform_devices[platform_devices_id];
-
-        cl_device_type device_type;
-
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device, CL_DEVICE_TYPE, sizeof (device_type), &device_type, NULL);
-
-        if (CL_rc == -1)
-        {
-          FREE_BACKEND_CTX_ON_ERROR;
-
-          return -1;
-        }
-
-        device_types_all |= device_type;
-      }
-    }
-
-    // In such a case, automatically enable cpu_md5CPU device type support, since it's disabled by default.
-
-    if ((device_types_all & (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR)) == 0)
-    {
-      opencl_device_types_filter |= CL_DEVICE_TYPE_CPU;
-    }
-
-    // In another case, when the user uses --stdout, using CPU devices is much faster to setup
-    // If we have a CPU device, force it to be used
-
-    if (user_options->stdout_flag == true)
-    {
-      if (device_types_all & CL_DEVICE_TYPE_CPU)
-      {
-        opencl_device_types_filter = CL_DEVICE_TYPE_CPU;
-      }
-    }
-
-    backend_ctx->opencl_device_types_filter = opencl_device_types_filter;
-  }
-
   backend_ctx->enabled = true;
 
-  backend_ctx->backend_device_from_cuda   = backend_device_from_cuda;
-  backend_ctx->backend_device_to_cuda     = backend_device_to_cuda;
-  backend_ctx->backend_device_from_opencl = backend_device_from_opencl;
-  backend_ctx->backend_device_to_opencl   = backend_device_to_opencl;
-
-  backend_ctx->platforms_vendor      = platforms_vendor;
-  backend_ctx->platforms_name        = platforms_name;
-  backend_ctx->platforms_version     = platforms_version;
-  backend_ctx->platforms_skipped     = platforms_skipped;
-  backend_ctx->platforms_cnt         = platforms_cnt;
-  backend_ctx->platforms             = platforms;
-  backend_ctx->platform_devices_cnt  = platform_devices_cnt;
-  backend_ctx->platform_devices      = platform_devices;
-
-  #undef FREE_BACKEND_CTX_ON_ERROR
-
   return 0;
 }
 
@@ -3752,17 +3811,13 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
 
   hcfree (backend_ctx->devices_param);
 
-  hcfree (backend_ctx->backend_device_from_cuda);
-  hcfree (backend_ctx->backend_device_to_cuda);
-  hcfree (backend_ctx->backend_device_from_opencl);
-  hcfree (backend_ctx->backend_device_to_opencl);
-
-  hcfree (backend_ctx->platforms);
-  hcfree (backend_ctx->platform_devices);
-  hcfree (backend_ctx->platforms_vendor);
-  hcfree (backend_ctx->platforms_name);
-  hcfree (backend_ctx->platforms_version);
-  hcfree (backend_ctx->platforms_skipped);
+  hcfree (backend_ctx->opencl_platforms);
+  hcfree (backend_ctx->opencl_platforms_devices);
+  hcfree (backend_ctx->opencl_platforms_devices_cnt);
+  hcfree (backend_ctx->opencl_platforms_name);
+  hcfree (backend_ctx->opencl_platforms_skipped);
+  hcfree (backend_ctx->opencl_platforms_vendor);
+  hcfree (backend_ctx->opencl_platforms_version);
 
   memset (backend_ctx, 0, sizeof (backend_ctx_t));
 }
@@ -3781,26 +3836,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   bool need_nvapi   = false;
   bool need_sysfs   = false;
 
-  int backend_devices_idx    = 0;
-  int backend_devices_cnt    = 0;
-  int backend_devices_active = 0;
+  int backend_devices_idx = 0;
+
+  int cuda_devices_cnt    = 0;
+  int cuda_devices_active = 0;
 
   if (backend_ctx->cuda)
   {
-    // cuda version
-
-    int cuda_driver_version = 0;
-
-    const int rc_cuDriverGetVersion = hc_cuDriverGetVersion (hashcat_ctx, &cuda_driver_version);
-
-    if (rc_cuDriverGetVersion == -1) return -1;
-
-    backend_ctx->cuda_driver_version = cuda_driver_version;
-
     // device count
 
-    int cuda_devices_cnt = 0;
-
     const int rc_cuDeviceGetCount = hc_cuDeviceGetCount (hashcat_ctx, &cuda_devices_cnt);
 
     if (rc_cuDeviceGetCount == -1)
@@ -3810,16 +3854,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     backend_ctx->cuda_devices_cnt = cuda_devices_cnt;
 
-    backend_devices_cnt += cuda_devices_cnt;
-
     // device specific
 
     for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++, backend_devices_idx++)
     {
+      const u32 device_id = backend_devices_idx;
+
       hc_device_param_t *device_param = &devices_param[backend_devices_idx];
 
-      backend_ctx->backend_device_from_cuda[cuda_devices_idx]  = backend_devices_idx;
-      backend_ctx->backend_device_to_cuda[backend_devices_idx] = cuda_devices_idx;
+      device_param->device_id = device_id;
+
+      backend_ctx->backend_device_from_cuda[cuda_devices_idx] = backend_devices_idx;
 
       CUdevice cuda_device;
 
@@ -3847,7 +3892,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // device_processors
 
-      int device_processors;
+      int device_processors = 0;
 
       CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_processors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, cuda_device);
 
@@ -3857,7 +3902,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // device_global_mem, device_maxmem_alloc, device_available_mem
 
-      size_t bytes;
+      size_t bytes = 0;
 
       CU_rc = hc_cuDeviceTotalMem (hashcat_ctx, &bytes, cuda_device);
 
@@ -3887,7 +3932,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // device_maxworkgroup_size
 
-      int device_maxworkgroup_size;
+      int device_maxworkgroup_size = 0;
 
       CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuda_device);
 
@@ -3897,7 +3942,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // max_clock_frequency
 
-      int device_maxclock_frequency;
+      int device_maxclock_frequency = 0;
 
       CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, cuda_device);
 
@@ -3907,8 +3952,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // pcie_bus, pcie_device, pcie_function
 
-      int pci_bus_id_nv;  // is cl_uint the right type for them??
-      int pci_slot_id_nv;
+      int pci_bus_id_nv  = 0;
+      int pci_slot_id_nv = 0;
 
       CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cuda_device);
 
@@ -3924,7 +3969,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // kernel_exec_timeout
 
-      int kernel_exec_timeout;
+      int kernel_exec_timeout = 0;
 
       CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, cuda_device);
 
@@ -3934,7 +3979,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // max_shared_memory_per_block
 
-      int max_shared_memory_per_block;
+      int max_shared_memory_per_block = 0;
 
       CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, cuda_device);
 
@@ -3949,7 +3994,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // device_max_constant_buffer_size
 
-      int device_max_constant_buffer_size;
+      int device_max_constant_buffer_size = 0;
 
       CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, cuda_device);
 
@@ -3971,16 +4016,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       //
 
 
-
+      device_param->skipped = true; // while developing
     }
   }
 
-  backend_ctx->backend_devices_cnt    = backend_devices_cnt;
-  backend_ctx->backend_devices_active = backend_devices_active;
+  backend_ctx->cuda_devices_cnt     = cuda_devices_cnt;
+  backend_ctx->cuda_devices_active  = cuda_devices_active;
 
-  u32 devices_cnt = 0;
-
-  u32 devices_active = 0;
+  int opencl_devices_cnt    = 0;
+  int opencl_devices_active = 0;
 
   if (backend_ctx->ocl)
   {
@@ -3988,128 +4032,115 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
      * OpenCL devices: simply push all devices from all platforms into the same device array
      */
 
-    cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
-    cl_platform_id *platforms             = backend_ctx->platforms;
-    cl_uint         platform_devices_cnt  = backend_ctx->platform_devices_cnt;
-    cl_device_id   *platform_devices      = backend_ctx->platform_devices;
+    cl_platform_id *opencl_platforms             = backend_ctx->opencl_platforms;
+    cl_uint         opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
+    cl_device_id  **opencl_platforms_devices     = backend_ctx->opencl_platforms_devices;
+    cl_uint        *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt;
+    char          **opencl_platforms_name        = backend_ctx->opencl_platforms_name;
+    bool           *opencl_platforms_skipped     = backend_ctx->opencl_platforms_skipped;
+    char          **opencl_platforms_vendor      = backend_ctx->opencl_platforms_vendor;
+    char          **opencl_platforms_version     = backend_ctx->opencl_platforms_version;
 
-    for (u32 platform_id = 0; platform_id < platforms_cnt; platform_id++)
+    for (u32 opencl_platform_idx = 0; opencl_platform_idx < opencl_platforms_cnt; opencl_platform_idx++)
     {
       size_t param_value_size = 0;
 
-      cl_platform_id platform = platforms[platform_id];
+      cl_platform_id opencl_platform = opencl_platforms[opencl_platform_idx];
 
       // platform vendor
 
       int CL_rc;
 
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VENDOR, 0, NULL, &param_value_size);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VENDOR, 0, NULL, &param_value_size);
 
       if (CL_rc == -1) return -1;
 
-      char *platform_vendor = (char *) hcmalloc (param_value_size);
+      char *opencl_platform_vendor = (char *) hcmalloc (param_value_size);
 
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VENDOR, param_value_size, platform_vendor, NULL);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VENDOR, param_value_size, opencl_platform_vendor, NULL);
 
       if (CL_rc == -1) return -1;
 
-      backend_ctx->platforms_vendor[platform_id] = platform_vendor;
+      opencl_platforms_vendor[opencl_platform_idx] = opencl_platform_vendor;
 
       // platform name
 
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_NAME, 0, NULL, &param_value_size);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_NAME, 0, NULL, &param_value_size);
 
       if (CL_rc == -1) return -1;
 
-      char *platform_name = (char *) hcmalloc (param_value_size);
+      char *opencl_platform_name = (char *) hcmalloc (param_value_size);
 
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_NAME, param_value_size, platform_name, NULL);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_NAME, param_value_size, opencl_platform_name, NULL);
 
       if (CL_rc == -1) return -1;
 
-      backend_ctx->platforms_name[platform_id] = platform_name;
+      opencl_platforms_name[opencl_platform_idx] = opencl_platform_name;
 
       // platform version
 
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VERSION, 0, NULL, &param_value_size);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VERSION, 0, NULL, &param_value_size);
 
       if (CL_rc == -1) return -1;
 
-      char *platform_version = (char *) hcmalloc (param_value_size);
+      char *opencl_platform_version = (char *) hcmalloc (param_value_size);
 
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, platform, CL_PLATFORM_VERSION, param_value_size, platform_version, NULL);
+      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VERSION, param_value_size, opencl_platform_version, NULL);
 
       if (CL_rc == -1) return -1;
 
-      backend_ctx->platforms_version[platform_id] = platform_version;
+      opencl_platforms_version[opencl_platform_idx] = opencl_platform_version;
 
       // find our own platform vendor because pocl and mesa are pushing original vendor_id through opencl
       // this causes trouble with vendor id based macros
       // we'll assign generic to those without special optimization available
 
-      cl_uint platform_vendor_id = 0;
+      cl_uint opencl_platform_vendor_id = 0;
 
-      if (strcmp (platform_vendor, CL_VENDOR_AMD1) == 0)
+      if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD1) == 0)
       {
-        platform_vendor_id = VENDOR_ID_AMD;
+        opencl_platform_vendor_id = VENDOR_ID_AMD;
       }
-      else if (strcmp (platform_vendor, CL_VENDOR_AMD2) == 0)
+      else if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD2) == 0)
       {
-        platform_vendor_id = VENDOR_ID_AMD;
+        opencl_platform_vendor_id = VENDOR_ID_AMD;
       }
-      else if (strcmp (platform_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+      else if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
       {
-        platform_vendor_id = VENDOR_ID_AMD_USE_INTEL;
+        opencl_platform_vendor_id = VENDOR_ID_AMD_USE_INTEL;
       }
-      else if (strcmp (platform_vendor, CL_VENDOR_APPLE) == 0)
+      else if (strcmp (opencl_platform_vendor, CL_VENDOR_APPLE) == 0)
       {
-        platform_vendor_id = VENDOR_ID_APPLE;
+        opencl_platform_vendor_id = VENDOR_ID_APPLE;
       }
-      else if (strcmp (platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+      else if (strcmp (opencl_platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
       {
-        platform_vendor_id = VENDOR_ID_INTEL_BEIGNET;
+        opencl_platform_vendor_id = VENDOR_ID_INTEL_BEIGNET;
       }
-      else if (strcmp (platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
+      else if (strcmp (opencl_platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
       {
-        platform_vendor_id = VENDOR_ID_INTEL_SDK;
+        opencl_platform_vendor_id = VENDOR_ID_INTEL_SDK;
       }
-      else if (strcmp (platform_vendor, CL_VENDOR_MESA) == 0)
+      else if (strcmp (opencl_platform_vendor, CL_VENDOR_MESA) == 0)
       {
-        platform_vendor_id = VENDOR_ID_MESA;
+        opencl_platform_vendor_id = VENDOR_ID_MESA;
       }
-      else if (strcmp (platform_vendor, CL_VENDOR_NV) == 0)
+      else if (strcmp (opencl_platform_vendor, CL_VENDOR_NV) == 0)
       {
-        platform_vendor_id = VENDOR_ID_NV;
+        opencl_platform_vendor_id = VENDOR_ID_NV;
       }
-      else if (strcmp (platform_vendor, CL_VENDOR_POCL) == 0)
+      else if (strcmp (opencl_platform_vendor, CL_VENDOR_POCL) == 0)
       {
-        platform_vendor_id = VENDOR_ID_POCL;
+        opencl_platform_vendor_id = VENDOR_ID_POCL;
       }
       else
       {
-        platform_vendor_id = VENDOR_ID_GENERIC;
+        opencl_platform_vendor_id = VENDOR_ID_GENERIC;
       }
 
-      bool platform_skipped = ((backend_ctx->opencl_platforms_filter & (1ULL << platform_id)) == 0);
-
-      CL_rc = hc_clGetDeviceIDs (hashcat_ctx, platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, platform_devices, &platform_devices_cnt);
-
-      if (CL_rc == -1)
-      {
-        //event_log_error (hashcat_ctx, "clGetDeviceIDs(): %s", val2cstr_cl (CL_rc));
-
-        //return -1;
-
-        platform_skipped = true;
-      }
-
-      backend_ctx->platforms_skipped[platform_id] = platform_skipped;
-
-      if (platform_skipped == true) continue;
-
       if (user_options->force == false)
       {
-        if (platform_vendor_id == VENDOR_ID_MESA)
+        if (opencl_platform_vendor_id == VENDOR_ID_MESA)
         {
           event_log_error (hashcat_ctx, "Mesa (Gallium) OpenCL platform detected!");
 
@@ -4123,29 +4154,56 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         }
       }
 
-      hc_device_param_t *devices_param = backend_ctx->devices_param;
+      bool opencl_platform_skipped = ((backend_ctx->opencl_platforms_filter & (1ULL << opencl_platform_idx)) == 0);
 
-      for (u32 platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
+      cl_device_id *opencl_platform_devices = (cl_device_id *) hccalloc (DEVICES_MAX, sizeof (cl_device_id));
+
+      cl_uint opencl_platform_devices_cnt = 0;
+
+      CL_rc = hc_clGetDeviceIDs (hashcat_ctx, opencl_platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, opencl_platform_devices, &opencl_platform_devices_cnt);
+
+      if (CL_rc == -1)
       {
-        const u32 device_id = devices_cnt;
+        //event_log_error (hashcat_ctx, "clGetDeviceIDs(): %s", val2cstr_cl (CL_rc));
+
+        //return -1;
+
+        opencl_platform_skipped = true;
+      }
+
+      opencl_platforms_devices[opencl_platform_idx] = opencl_platform_devices;
+
+      opencl_platforms_devices_cnt[opencl_platform_idx] = opencl_platform_devices_cnt;
+
+      opencl_platforms_skipped[opencl_platform_idx] = opencl_platform_skipped;
+
+      //if (opencl_platform_skipped == true) continue;
+
+      for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, backend_devices_idx++, opencl_devices_cnt++)
+      {
+        const u32 device_id = backend_devices_idx;
 
         hc_device_param_t *device_param = &devices_param[device_id];
 
-        device_param->platform_vendor_id = platform_vendor_id;
-
-        device_param->device = platform_devices[platform_devices_id];
-
         device_param->device_id = device_id;
 
-        device_param->platform_devices_id = platform_devices_id;
+        backend_ctx->backend_device_from_opencl[opencl_devices_cnt] = backend_devices_idx;
 
-        device_param->platform = platform;
+        backend_ctx->backend_device_from_opencl_platform[opencl_platform_idx][opencl_platform_devices_idx] = backend_devices_idx;
+
+        device_param->opencl_platform_vendor_id = opencl_platform_vendor_id;
+
+        device_param->opencl_device = opencl_platform_devices[opencl_platform_devices_idx];
+
+        device_param->opencl_platform_devices_id = opencl_platform_devices_idx;
+
+        device_param->opencl_platform = opencl_platform;
 
         // opencl_device_type
 
         cl_device_type opencl_device_type;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_TYPE, sizeof (opencl_device_type), &opencl_device_type, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TYPE, sizeof (opencl_device_type), &opencl_device_type, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -4155,13 +4213,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         // device_name
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_NAME, 0, NULL, &param_value_size);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, 0, NULL, &param_value_size);
 
         if (CL_rc == -1) return -1;
 
         char *device_name = (char *) hcmalloc (param_value_size);
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_NAME, param_value_size, device_name, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NAME, param_value_size, device_name, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -4173,108 +4231,108 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         // device_vendor
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VENDOR, 0, NULL, &param_value_size);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, 0, NULL, &param_value_size);
 
         if (CL_rc == -1) return -1;
 
-        char *device_vendor = (char *) hcmalloc (param_value_size);
+        char *opencl_device_vendor = (char *) hcmalloc (param_value_size);
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VENDOR, param_value_size, device_vendor, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VENDOR, param_value_size, opencl_device_vendor, NULL);
 
         if (CL_rc == -1) return -1;
 
-        device_param->device_vendor = device_vendor;
+        device_param->opencl_device_vendor = opencl_device_vendor;
 
-        cl_uint device_vendor_id = 0;
+        cl_uint opencl_device_vendor_id = 0;
 
-        if (strcmp (device_vendor, CL_VENDOR_AMD1) == 0)
+        if (strcmp (opencl_device_vendor, CL_VENDOR_AMD1) == 0)
         {
-          device_vendor_id = VENDOR_ID_AMD;
+          opencl_device_vendor_id = VENDOR_ID_AMD;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_AMD2) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD2) == 0)
         {
-          device_vendor_id = VENDOR_ID_AMD;
+          opencl_device_vendor_id = VENDOR_ID_AMD;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
         {
-          device_vendor_id = VENDOR_ID_AMD_USE_INTEL;
+          opencl_device_vendor_id = VENDOR_ID_AMD_USE_INTEL;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_APPLE) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE) == 0)
         {
-          device_vendor_id = VENDOR_ID_APPLE;
+          opencl_device_vendor_id = VENDOR_ID_APPLE;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_AMD) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_AMD) == 0)
         {
-          device_vendor_id = VENDOR_ID_AMD;
+          opencl_device_vendor_id = VENDOR_ID_AMD;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_NV) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_NV) == 0)
         {
-          device_vendor_id = VENDOR_ID_NV;
+          opencl_device_vendor_id = VENDOR_ID_NV;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_APPLE_USE_INTEL) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_APPLE_USE_INTEL) == 0)
         {
-          device_vendor_id = VENDOR_ID_INTEL_SDK;
+          opencl_device_vendor_id = VENDOR_ID_INTEL_SDK;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
         {
-          device_vendor_id = VENDOR_ID_INTEL_BEIGNET;
+          opencl_device_vendor_id = VENDOR_ID_INTEL_BEIGNET;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_INTEL_SDK) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_INTEL_SDK) == 0)
         {
-          device_vendor_id = VENDOR_ID_INTEL_SDK;
+          opencl_device_vendor_id = VENDOR_ID_INTEL_SDK;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_MESA) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_MESA) == 0)
         {
-          device_vendor_id = VENDOR_ID_MESA;
+          opencl_device_vendor_id = VENDOR_ID_MESA;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_NV) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_NV) == 0)
         {
-          device_vendor_id = VENDOR_ID_NV;
+          opencl_device_vendor_id = VENDOR_ID_NV;
         }
-        else if (strcmp (device_vendor, CL_VENDOR_POCL) == 0)
+        else if (strcmp (opencl_device_vendor, CL_VENDOR_POCL) == 0)
         {
-          device_vendor_id = VENDOR_ID_POCL;
+          opencl_device_vendor_id = VENDOR_ID_POCL;
         }
         else
         {
-          device_vendor_id = VENDOR_ID_GENERIC;
+          opencl_device_vendor_id = VENDOR_ID_GENERIC;
         }
 
-        device_param->device_vendor_id = device_vendor_id;
+        device_param->opencl_device_vendor_id = opencl_device_vendor_id;
 
         // device_version
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VERSION, 0, NULL, &param_value_size);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, 0, NULL, &param_value_size);
 
         if (CL_rc == -1) return -1;
 
-        char *device_version = (char *) hcmalloc (param_value_size);
+        char *opencl_device_version = (char *) hcmalloc (param_value_size);
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_VERSION, param_value_size, device_version, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_VERSION, param_value_size, opencl_device_version, NULL);
 
         if (CL_rc == -1) return -1;
 
-        device_param->device_version = device_version;
+        device_param->opencl_device_version = opencl_device_version;
 
-        // device_opencl_version
+        // opencl_device_c_version
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &param_value_size);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &param_value_size);
 
         if (CL_rc == -1) return -1;
 
-        char *device_opencl_version = (char *) hcmalloc (param_value_size);
+        char *opencl_device_c_version = (char *) hcmalloc (param_value_size);
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, device_opencl_version, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, opencl_device_c_version, NULL);
 
         if (CL_rc == -1) return -1;
 
-        device_param->device_opencl_version = device_opencl_version;
+        device_param->opencl_device_c_version = opencl_device_c_version;
 
         // max_compute_units
 
         cl_uint device_processors;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (device_processors), &device_processors, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (device_processors), &device_processors, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -4284,7 +4342,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_ulong device_global_mem;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (device_global_mem), &device_global_mem, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (device_global_mem), &device_global_mem, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -4296,7 +4354,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_ulong device_maxmem_alloc;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (device_maxmem_alloc), &device_maxmem_alloc, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -4310,7 +4368,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         size_t device_maxworkgroup_size;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (device_maxworkgroup_size), &device_maxworkgroup_size, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -4320,7 +4378,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_uint device_maxclock_frequency;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (device_maxclock_frequency), &device_maxclock_frequency, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (device_maxclock_frequency), &device_maxclock_frequency, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -4330,13 +4388,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_bool device_endian_little;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_ENDIAN_LITTLE, sizeof (device_endian_little), &device_endian_little, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_ENDIAN_LITTLE, sizeof (device_endian_little), &device_endian_little, NULL);
 
         if (CL_rc == -1) return -1;
 
         if (device_endian_little == CL_FALSE)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", device_id + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", backend_devices_idx + 1);
 
           device_param->skipped = true;
         }
@@ -4345,13 +4403,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_bool device_available;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_AVAILABLE, sizeof (device_available), &device_available, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_AVAILABLE, sizeof (device_available), &device_available, NULL);
 
         if (CL_rc == -1) return -1;
 
         if (device_available == CL_FALSE)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", device_id + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", backend_devices_idx + 1);
 
           device_param->skipped = true;
         }
@@ -4360,13 +4418,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_bool device_compiler_available;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPILER_AVAILABLE, sizeof (device_compiler_available), &device_compiler_available, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPILER_AVAILABLE, sizeof (device_compiler_available), &device_compiler_available, NULL);
 
         if (CL_rc == -1) return -1;
 
         if (device_compiler_available == CL_FALSE)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", device_id + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", backend_devices_idx + 1);
 
           device_param->skipped = true;
         }
@@ -4375,13 +4433,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_device_exec_capabilities device_execution_capabilities;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof (device_execution_capabilities), &device_execution_capabilities, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof (device_execution_capabilities), &device_execution_capabilities, NULL);
 
         if (CL_rc == -1) return -1;
 
         if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", device_id + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", backend_devices_idx + 1);
 
           device_param->skipped = true;
         }
@@ -4390,26 +4448,26 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         size_t device_extensions_size;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXTENSIONS, 0, NULL, &device_extensions_size);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, 0, NULL, &device_extensions_size);
 
         if (CL_rc == -1) return -1;
 
         char *device_extensions = hcmalloc (device_extensions_size + 1);
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_EXTENSIONS, device_extensions_size, device_extensions, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_EXTENSIONS, device_extensions_size, device_extensions, NULL);
 
         if (CL_rc == -1) return -1;
 
         if (strstr (device_extensions, "base_atomics") == 0)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", device_id + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", backend_devices_idx + 1);
 
           device_param->skipped = true;
         }
 
         if (strstr (device_extensions, "byte_addressable_store") == 0)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", device_id + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", backend_devices_idx + 1);
 
           device_param->skipped = true;
         }
@@ -4420,13 +4478,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_ulong device_max_constant_buffer_size;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (device_max_constant_buffer_size), &device_max_constant_buffer_size, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (device_max_constant_buffer_size), &device_max_constant_buffer_size, NULL);
 
         if (CL_rc == -1) return -1;
 
         if (device_max_constant_buffer_size < 65536)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", device_id + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", backend_devices_idx + 1);
 
           device_param->skipped = true;
         }
@@ -4435,13 +4493,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_ulong device_local_mem_size;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (device_local_mem_size), &device_local_mem_size, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (device_local_mem_size), &device_local_mem_size, NULL);
 
         if (CL_rc == -1) return -1;
 
         if (device_local_mem_size < 32768)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", backend_devices_idx + 1);
 
           device_param->skipped = true;
         }
@@ -4452,7 +4510,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_device_local_mem_type device_local_mem_type;
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -4466,11 +4524,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (opencl_device_type & CL_DEVICE_TYPE_CPU)
         {
-          if (device_param->device_vendor_id == VENDOR_ID_AMD_USE_INTEL)
+          if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_INTEL)
           {
             if (user_options->force == false)
             {
-              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Not a native Intel OpenCL runtime. Expect massive speed loss.", device_id + 1);
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Not a native Intel OpenCL runtime. Expect massive speed loss.", backend_devices_idx + 1);
               if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
               device_param->skipped = true;
@@ -4486,11 +4544,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         #if !defined (__APPLE__)
         if (opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
-          if ((device_param->device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->device_vendor_id == VENDOR_ID_INTEL_BEIGNET))
+          if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) || (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_BEIGNET))
           {
             if (user_options->force == false)
             {
-              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", device_id + 1);
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", backend_devices_idx + 1);
               if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             We are waiting for updated OpenCL drivers from Intel.");
               if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
@@ -4514,23 +4572,23 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         // driver_version
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DRIVER_VERSION, 0, NULL, &param_value_size);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, 0, NULL, &param_value_size);
 
         if (CL_rc == -1) return -1;
 
-        char *driver_version = (char *) hcmalloc (param_value_size);
+        char *opencl_driver_version = (char *) hcmalloc (param_value_size);
 
-        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DRIVER_VERSION, param_value_size, driver_version, NULL);
+        CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DRIVER_VERSION, param_value_size, opencl_driver_version, NULL);
 
         if (CL_rc == -1) return -1;
 
-        device_param->driver_version = driver_version;
+        device_param->opencl_driver_version = opencl_driver_version;
 
         // vendor specific
 
         if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
-          if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
           {
             need_adl = true;
 
@@ -4539,7 +4597,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
             #endif
           }
 
-          if ((device_param->platform_vendor_id == VENDOR_ID_NV) && (device_param->device_vendor_id == VENDOR_ID_NV))
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
           {
             need_nvml = true;
 
@@ -4551,11 +4609,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
-          if ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->device_vendor_id == VENDOR_ID_AMD))
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
           {
             cl_device_topology_amd amdtopo;
 
-            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL);
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL);
 
             if (CL_rc == -1) return -1;
 
@@ -4564,16 +4622,16 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
             device_param->pcie_function = amdtopo.pcie.function;
           }
 
-          if ((device_param->platform_vendor_id == VENDOR_ID_NV) && (device_param->device_vendor_id == VENDOR_ID_NV))
+          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
           {
             cl_uint pci_bus_id_nv;  // is cl_uint the right type for them??
             cl_uint pci_slot_id_nv;
 
-            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_PCI_BUS_ID_NV, sizeof (pci_bus_id_nv), &pci_bus_id_nv, NULL);
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_BUS_ID_NV, sizeof (pci_bus_id_nv), &pci_bus_id_nv, NULL);
 
             if (CL_rc == -1) return -1;
 
-            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof (pci_slot_id_nv), &pci_slot_id_nv, NULL);
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof (pci_slot_id_nv), &pci_slot_id_nv, NULL);
 
             if (CL_rc == -1) return -1;
 
@@ -4584,11 +4642,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
             int sm_minor = 0;
             int sm_major = 0;
 
-            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL);
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL);
 
             if (CL_rc == -1) return -1;
 
-            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL);
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL);
 
             if (CL_rc == -1) return -1;
 
@@ -4597,7 +4655,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
             cl_uint kernel_exec_timeout = 0;
 
-            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL);
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof (kernel_exec_timeout), &kernel_exec_timeout, NULL);
 
             if (CL_rc == -1) return -1;
 
@@ -4619,7 +4677,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           {
             if (opencl_device_type & CL_DEVICE_TYPE_CPU)
             {
-              if (device_param->platform_vendor_id == VENDOR_ID_INTEL_SDK)
+              if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK)
               {
                 bool intel_warn = false;
 
@@ -4630,7 +4688,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
                 int opencl_driver3 = 0;
                 int opencl_driver4 = 0;
 
-                const int res18 = sscanf (device_param->driver_version, "%u.%u.%u.%u", &opencl_driver1, &opencl_driver2, &opencl_driver3, &opencl_driver4);
+                const int res18 = sscanf (device_param->opencl_driver_version, "%u.%u.%u.%u", &opencl_driver1, &opencl_driver2, &opencl_driver3, &opencl_driver4);
 
                 if (res18 == 4)
                 {
@@ -4643,7 +4701,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
                   float opencl_version = 0;
                   int   opencl_build   = 0;
 
-                  const int res16 = sscanf (device_param->device_version, "OpenCL %f (Build %d)", &opencl_version, &opencl_build);
+                  const int res16 = sscanf (device_param->opencl_device_version, "OpenCL %f (Build %d)", &opencl_version, &opencl_build);
 
                   if (res16 == 2)
                   {
@@ -4653,7 +4711,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (intel_warn == true)
                 {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", device_id + 1, device_param->driver_version);
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", backend_devices_idx + 1, device_param->opencl_driver_version);
 
                   event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
                   event_log_warning (hashcat_ctx, "See hashcat.net for officially supported NVIDIA drivers.");
@@ -4667,23 +4725,23 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
             }
             else if (opencl_device_type & CL_DEVICE_TYPE_GPU)
             {
-              if (device_param->platform_vendor_id == VENDOR_ID_AMD)
+              if (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)
               {
                 bool amd_warn = true;
 
                 #if defined (__linux__)
                 // AMDGPU-PRO Driver 16.40 and higher
-                if (strtoul (device_param->driver_version, NULL, 10) >= 2117) amd_warn = false;
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 2117) amd_warn = false;
                 // AMDGPU-PRO Driver 16.50 is known to be broken
-                if (strtoul (device_param->driver_version, NULL, 10) == 2236) amd_warn = true;
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2236) amd_warn = true;
                 // AMDGPU-PRO Driver 16.60 is known to be broken
-                if (strtoul (device_param->driver_version, NULL, 10) == 2264) amd_warn = true;
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2264) amd_warn = true;
                 // AMDGPU-PRO Driver 17.10 is known to be broken
-                if (strtoul (device_param->driver_version, NULL, 10) == 2348) amd_warn = true;
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) == 2348) amd_warn = true;
                 // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117
                 #elif defined (_WIN)
                 // AMD Radeon Software 14.9 and higher, should be updated to 15.12
-                if (strtoul (device_param->driver_version, NULL, 10) >= 1573) amd_warn = false;
+                if (strtoul (device_param->opencl_driver_version, NULL, 10) >= 1573) amd_warn = false;
                 #else
                 // we have no information about other os
                 if (amd_warn == true) amd_warn = false;
@@ -4691,7 +4749,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (amd_warn == true)
                 {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", device_id + 1, device_param->driver_version);
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", backend_devices_idx + 1, device_param->opencl_driver_version);
 
                   event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported AMD driver.");
                   event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD drivers.");
@@ -4703,14 +4761,14 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
                 }
               }
 
-              if (device_param->platform_vendor_id == VENDOR_ID_NV)
+              if (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)
               {
                 int nv_warn = true;
 
                 int version_maj = 0;
                 int version_min = 0;
 
-                const int r = sscanf (device_param->driver_version, "%d.%d", &version_maj, &version_min);
+                const int r = sscanf (device_param->opencl_driver_version, "%d.%d", &version_maj, &version_min);
 
                 if (r == 2)
                 {
@@ -4742,7 +4800,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (nv_warn == true)
                 {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->driver_version);
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", backend_devices_idx + 1, device_param->opencl_driver_version);
 
                   event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
                   event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers.");
@@ -4755,22 +4813,22 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (device_param->sm_major < 5)
                 {
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", backend_devices_idx + 1, device_param->sm_major, device_param->sm_minor);
                   if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
                   if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
                 }
 
                 if (device_param->kernel_exec_timeout != 0)
                 {
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", backend_devices_idx + 1);
                   if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
                   if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
                 }
               }
 
-              if ((strstr (device_param->device_opencl_version, "beignet")) || (strstr (device_param->device_version, "beignet")))
+              if ((strstr (device_param->opencl_device_c_version, "beignet")) || (strstr (device_param->opencl_device_version, "beignet")))
               {
-                event_log_error (hashcat_ctx, "* Device #%u: Intel beignet driver detected!", device_id + 1);
+                event_log_error (hashcat_ctx, "* Device #%u: Intel beignet driver detected!", backend_devices_idx + 1);
 
                 event_log_warning (hashcat_ctx, "The beignet driver has been marked as likely to fail kernel compilation.");
                 event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
@@ -4785,7 +4843,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
            * activate device
            */
 
-          devices_active++;
+          opencl_devices_active++;
         }
 
         /**
@@ -4797,10 +4855,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         cl_context_properties properties[3];
 
         properties[0] = CL_CONTEXT_PLATFORM;
-        properties[1] = (cl_context_properties) device_param->platform;
+        properties[1] = (cl_context_properties) device_param->opencl_platform;
         properties[2] = 0;
 
-        CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->device, NULL, NULL, &context);
+        CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context);
 
         if (CL_rc == -1) return -1;
 
@@ -4810,40 +4868,40 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_command_queue command_queue;
 
-        CL_rc = hc_clCreateCommandQueue (hashcat_ctx, context, device_param->device, 0, &command_queue);
+        CL_rc = hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue);
 
         if (CL_rc == -1) return -1;
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_AMD))
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
         {
-          const bool has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
+          const bool has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
           device_param->has_vadd3 = has_vadd3;
 
-          const bool has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
+          const bool has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
           device_param->has_vbfe = has_vbfe;
 
-          const bool has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
+          const bool has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
 
           device_param->has_vperm = has_vperm;
         }
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
         {
-          const bool has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
+          const bool has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
 
           device_param->has_bfe = has_bfe;
 
-          const bool has_lop3 = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");
+          const bool has_lop3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");
 
           device_param->has_lop3 = has_lop3;
 
-          const bool has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
+          const bool has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
 
           device_param->has_mov64 = has_mov64;
 
-          const bool has_prmt = opencl_test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
+          const bool has_prmt = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
 
           device_param->has_prmt = has_prmt;
         }
@@ -4856,9 +4914,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
 
         #if defined (_WIN)
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_NV))
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
         #else
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || (device_param->platform_vendor_id == VENDOR_ID_AMD)))
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) || (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)))
         #endif
         {
           // OK, so the problem here is the following:
@@ -4940,12 +4998,20 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         // next please
 
-        devices_cnt++;
+
       }
     }
   }
 
-  if (devices_active == 0)
+  backend_ctx->opencl_devices_cnt     = opencl_devices_cnt;
+  backend_ctx->opencl_devices_active  = opencl_devices_active;
+
+  // all devices combined go into backend_* variables
+
+  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + opencl_devices_cnt;
+  backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
+
+  if (backend_ctx->backend_devices_cnt == 0)
   {
     event_log_error (hashcat_ctx, "No devices found/left.");
 
@@ -4956,28 +5022,25 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   if (backend_ctx->devices_filter != (u64) -1)
   {
-    const u64 devices_cnt_mask = ~(((u64) -1 >> devices_cnt) << devices_cnt);
+    const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
 
-    if (backend_ctx->devices_filter > devices_cnt_mask)
+    if (backend_ctx->devices_filter > backend_devices_cnt_mask)
     {
       event_log_error (hashcat_ctx, "An invalid device was specified using the --opencl-devices parameter.");
-      event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", devices_cnt);
+      event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
 
       return -1;
     }
   }
 
-  backend_ctx->target_msec     = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
+  backend_ctx->target_msec  = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
 
-  backend_ctx->devices_cnt     = devices_cnt;
-  backend_ctx->devices_active  = devices_active;
+  backend_ctx->need_adl     = need_adl;
+  backend_ctx->need_nvml    = need_nvml;
+  backend_ctx->need_nvapi   = need_nvapi;
+  backend_ctx->need_sysfs   = need_sysfs;
 
-  backend_ctx->need_adl        = need_adl;
-  backend_ctx->need_nvml       = need_nvml;
-  backend_ctx->need_nvapi      = need_nvapi;
-  backend_ctx->need_sysfs      = need_sysfs;
-
-  backend_ctx->comptime        = comptime;
+  backend_ctx->comptime     = comptime;
 
   return 0;
 }
@@ -4988,28 +5051,34 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return;
 
-  for (u32 platform_id = 0; platform_id < backend_ctx->platforms_cnt; platform_id++)
+  for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < backend_ctx->opencl_platforms_cnt; opencl_platforms_idx++)
   {
-    hcfree (backend_ctx->platforms_vendor[platform_id]);
-    hcfree (backend_ctx->platforms_name[platform_id]);
-    hcfree (backend_ctx->platforms_version[platform_id]);
+    hcfree (backend_ctx->opencl_platforms_devices[opencl_platforms_idx]);
+    hcfree (backend_ctx->opencl_platforms_name[opencl_platforms_idx]);
+    hcfree (backend_ctx->opencl_platforms_vendor[opencl_platforms_idx]);
+    hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]);
   }
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
     hcfree (device_param->device_name);
-    hcfree (device_param->device_version);
-    hcfree (device_param->driver_version);
-    hcfree (device_param->device_opencl_version);
-    hcfree (device_param->device_vendor);
+
+    hcfree (device_param->opencl_driver_version);
+    hcfree (device_param->opencl_device_version);
+    hcfree (device_param->opencl_device_c_version);
+    hcfree (device_param->opencl_device_vendor);
   }
 
-  backend_ctx->devices_cnt    = 0;
-  backend_ctx->devices_active = 0;
+  backend_ctx->backend_devices_cnt    = 0;
+  backend_ctx->backend_devices_active = 0;
+  backend_ctx->cuda_devices_cnt       = 0;
+  backend_ctx->cuda_devices_active    = 0;
+  backend_ctx->opencl_devices_cnt     = 0;
+  backend_ctx->opencl_devices_active  = 0;
 
   backend_ctx->need_adl    = false;
   backend_ctx->need_nvml   = false;
@@ -5019,10 +5088,17 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
 
 static bool is_same_device_type (const hc_device_param_t *src, const hc_device_param_t *dst)
 {
-  if (strcmp (src->device_name,    dst->device_name)    != 0) return false;
-  if (strcmp (src->device_vendor,  dst->device_vendor)  != 0) return false;
-  if (strcmp (src->device_version, dst->device_version) != 0) return false;
-  if (strcmp (src->driver_version, dst->driver_version) != 0) return false;
+  if (strcmp (src->device_name, dst->device_name) != 0) return false;
+
+  if (src->is_cuda   != dst->is_cuda)   return false;
+  if (src->is_opencl != dst->is_opencl) return false;
+
+  if (src->is_cuda == true)
+  {
+    if (strcmp (src->opencl_device_vendor,  dst->opencl_device_vendor)  != 0) return false;
+    if (strcmp (src->opencl_device_version, dst->opencl_device_version) != 0) return false;
+    if (strcmp (src->opencl_driver_version, dst->opencl_driver_version) != 0) return false;
+  }
 
   if (src->device_processors         != dst->device_processors)         return false;
   if (src->device_maxclock_frequency != dst->device_maxclock_frequency) return false;
@@ -5046,17 +5122,17 @@ void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id_src = 0; device_id_src < backend_ctx->devices_cnt; device_id_src++)
+  for (int backend_devices_cnt_src = 0; backend_devices_cnt_src < backend_ctx->backend_devices_cnt; backend_devices_cnt_src++)
   {
-    hc_device_param_t *device_param_src = &backend_ctx->devices_param[device_id_src];
+    hc_device_param_t *device_param_src = &backend_ctx->devices_param[backend_devices_cnt_src];
 
     if (device_param_src->skipped == true) continue;
 
     if (device_param_src->skipped_warning == true) continue;
 
-    for (u32 device_id_dst = device_id_src; device_id_dst < backend_ctx->devices_cnt; device_id_dst++)
+    for (int backend_devices_cnt_dst = backend_devices_cnt_src + 1; backend_devices_cnt_dst < backend_ctx->backend_devices_cnt; backend_devices_cnt_dst++)
     {
-      hc_device_param_t *device_param_dst = &backend_ctx->devices_param[device_id_dst];
+      hc_device_param_t *device_param_dst = &backend_ctx->devices_param[backend_devices_cnt_dst];
 
       if (device_param_dst->skipped == true) continue;
 
@@ -5090,9 +5166,9 @@ void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
 
   u32 kernel_power_all = 0;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
@@ -5136,9 +5212,9 @@ void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
@@ -5184,7 +5260,7 @@ static int get_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
   size_t work_group_size;
 
-  CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (work_group_size), &work_group_size, NULL);
+  CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (work_group_size), &work_group_size, NULL);
 
   if (CL_rc == -1) return -1;
 
@@ -5192,7 +5268,7 @@ static int get_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
   size_t compile_work_group_size[3];
 
-  CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof (compile_work_group_size), &compile_work_group_size, NULL);
+  CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof (compile_work_group_size), &compile_work_group_size, NULL);
 
   if (CL_rc == -1) return -1;
 
@@ -5214,7 +5290,7 @@ static int get_kernel_preferred_wgs_multiple (hashcat_ctx_t *hashcat_ctx, hc_dev
 
   size_t preferred_work_group_size_multiple;
 
-  CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof (preferred_work_group_size_multiple), &preferred_work_group_size_multiple, NULL);
+  CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof (preferred_work_group_size_multiple), &preferred_work_group_size_multiple, NULL);
 
   if (CL_rc == -1) return -1;
 
@@ -5229,7 +5305,7 @@ static int get_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_para
 
   cl_ulong local_mem_size;
 
-  CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (local_mem_size), &local_mem_size, NULL);
+  CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->opencl_device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof (local_mem_size), &local_mem_size, NULL);
 
   if (CL_rc == -1) return -1;
 
@@ -5376,7 +5452,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
   u32 hardware_power_all = 0;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
     int CL_rc = CL_SUCCESS;
 
@@ -5384,11 +5460,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * host buffer
      */
 
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
-    EVENT_DATA (EVENT_OPENCL_DEVICE_INIT_PRE, &device_id, sizeof (u32));
+    EVENT_DATA (EVENT_OPENCL_DEVICE_INIT_PRE, &backend_devices_idx, sizeof (int));
 
     /**
      * module depending checks
@@ -5402,7 +5478,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if ((unstable_warning == true) && (user_options->force == false))
       {
-        event_log_warning (hashcat_ctx, "* Device #%u: Skipping hash-mode %u - known OpenCL/Driver issue (not a hashcat issue)", device_id + 1, hashconfig->hash_mode);
+        event_log_warning (hashcat_ctx, "* Device #%u: Skipping hash-mode %u - known OpenCL/Driver issue (not a hashcat issue)", backend_devices_idx + 1, hashconfig->hash_mode);
         event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
         device_param->skipped_warning = true;
@@ -5434,13 +5510,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (hashconfig->opti_type & OPTI_TYPE_USES_BITS_64)
         {
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
+          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
 
           if (CL_rc == -1) return -1;
         }
         else
         {
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL);
+          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -5615,10 +5691,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     cl_context_properties properties[3];
 
     properties[0] = CL_CONTEXT_PLATFORM;
-    properties[1] = (cl_context_properties) device_param->platform;
+    properties[1] = (cl_context_properties) device_param->opencl_platform;
     properties[2] = 0;
 
-    CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->device, NULL, NULL, &device_param->context);
+    CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
 
     if (CL_rc == -1) return -1;
 
@@ -5627,9 +5703,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      */
 
     // not supported with NV
-    // device_param->command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->device, NULL);
+    // device_param->command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
 
-    CL_rc = hc_clCreateCommandQueue (hashcat_ctx, device_param->context, device_param->device, CL_QUEUE_PROFILING_ENABLE, &device_param->command_queue);
+    CL_rc = hc_clCreateCommandQueue (hashcat_ctx, device_param->context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->command_queue);
 
     if (CL_rc == -1) return -1;
 
@@ -5741,9 +5817,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     // we don't have sm_* on vendors not NV but it doesn't matter
 
     #if defined (DEBUG)
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
     #else
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
     #endif
 
     build_options_buf[build_options_len] = 0;
@@ -5751,7 +5827,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     /*
     if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
     {
-      if (device_param->platform_vendor_id == VENDOR_ID_INTEL_SDK)
+      if (device_param->opencl_platform_vendor_id == VENDOR_ID_INTEL_SDK)
       {
         strncat (build_options_buf, " -cl-opt-disable", 16);
       }
@@ -5777,8 +5853,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     build_options_module_buf[build_options_module_len] = 0;
 
     #if defined (DEBUG)
-    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options '%s'", device_id + 1, build_options_buf);
-    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options_module '%s'", device_id + 1, build_options_module_buf);
+    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options '%s'", backend_devices_idx + 1, build_options_buf);
+    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options_module '%s'", backend_devices_idx + 1, build_options_module_buf);
     #endif
 
     /**
@@ -5790,19 +5866,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%u-%s-%s-%s-%d-%u",
       backend_ctx->comptime,
-      device_param->platform_vendor_id,
+      device_param->opencl_platform_vendor_id,
       device_param->device_name,
-      device_param->device_version,
-      device_param->driver_version,
+      device_param->opencl_device_version,
+      device_param->opencl_driver_version,
       device_param->vector_width,
       hashconfig->kern_type);
 
     const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%u-%s-%s-%s",
       backend_ctx->comptime,
-      device_param->platform_vendor_id,
+      device_param->opencl_platform_vendor_id,
       device_param->device_name,
-      device_param->device_version,
-      device_param->driver_version);
+      device_param->opencl_device_version,
+      device_param->opencl_driver_version);
 
     md5_ctx_t md5_ctx;
 
@@ -5827,7 +5903,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     // Seems to be completely broken on Apple + (Intel?) CPU
     // To reproduce set cache_disable to false and run benchmark -b
 
-    if (device_param->platform_vendor_id == VENDOR_ID_APPLE)
+    if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
     {
       if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
       {
@@ -5900,7 +5976,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (cached == false)
       {
         #if defined (DEBUG)
-        if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", device_id + 1, filename_from_filepath (cached_file));
+        if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", backend_devices_idx + 1, filename_from_filepath (cached_file));
         #endif
 
         const bool rc_read_kernel = read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources, true);
@@ -5953,7 +6029,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             device_param->skipped_warning = true;
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", backend_devices_idx + 1, source_file);
 
             continue;
           }
@@ -5993,13 +6069,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->device, build_options_module_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->opencl_device, build_options_module_buf, NULL, NULL);
 
           //if (CL_rc == -1) return -1;
 
           size_t build_log_size = 0;
 
-          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
 
           //if (CL_rc == -1) return -1;
 
@@ -6011,7 +6087,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
 
             if (CL_rc_build == -1) return -1;
 
@@ -6024,7 +6100,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             device_param->skipped_warning = true;
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", backend_devices_idx + 1, source_file);
 
             continue;
           }
@@ -6057,11 +6133,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (rc_read_kernel == false) return -1;
 
-        CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program);
+        CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->device, build_options_module_buf, NULL, NULL);
+        CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->opencl_device, build_options_module_buf, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -6137,7 +6213,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         if (cached == false)
         {
           #if defined (DEBUG)
-          if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", device_id + 1, filename_from_filepath (cached_file));
+          if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", backend_devices_idx + 1, filename_from_filepath (cached_file));
           #endif
 
           const bool rc_read_kernel = read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources, true);
@@ -6148,13 +6224,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_mp, 1, &device_param->device, build_options_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
           //if (CL_rc == -1) return -1;
 
           size_t build_log_size = 0;
 
-          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_mp, device_param->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
 
           //if (CL_rc == -1) return -1;
 
@@ -6166,7 +6242,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_mp, device_param->device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
 
             if (CL_rc_build == -1) return -1;
 
@@ -6179,7 +6255,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             device_param->skipped_warning = true;
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", backend_devices_idx + 1, source_file);
 
             continue;
           }
@@ -6209,11 +6285,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_mp);
+          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_mp);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_mp, 1, &device_param->device, build_options_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -6292,7 +6368,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         if (cached == false)
         {
           #if defined (DEBUG)
-          if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", device_id + 1, filename_from_filepath (cached_file));
+          if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", backend_devices_idx + 1, filename_from_filepath (cached_file));
           #endif
 
           const bool rc_read_kernel = read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources, true);
@@ -6303,13 +6379,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_amp, 1, &device_param->device, build_options_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
           //if (CL_rc == -1) return -1;
 
           size_t build_log_size = 0;
 
-          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_amp, device_param->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
 
           //if (CL_rc == -1) return -1;
 
@@ -6321,7 +6397,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-            int CL_rc_build_info = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_amp, device_param->device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            int CL_rc_build_info = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
 
             if (CL_rc_build_info == -1) return -1;
 
@@ -6334,7 +6410,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             device_param->skipped_warning = true;
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", backend_devices_idx + 1, source_file);
 
             continue;
           }
@@ -6364,11 +6440,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_amp);
+          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_amp);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_amp, 1, &device_param->device, build_options_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -7576,7 +7652,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       #endif
 
       // we assume all devices have the same specs here, which is wrong, it's a start
-      if ((size_total_host * backend_ctx->devices_cnt) > MAX_HOST_MEMORY) memory_limit_hit = 1;
+      if ((size_total_host * backend_ctx->backend_devices_cnt) > MAX_HOST_MEMORY) memory_limit_hit = 1;
 
       if (memory_limit_hit == 1)
       {
@@ -7590,7 +7666,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (kernel_accel_max < kernel_accel_min)
     {
-      event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory for this attack.", device_id + 1);
+      event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory for this attack.", backend_devices_idx + 1);
 
       return -1;
     }
@@ -7732,7 +7808,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     hardware_power_all += device_param->hardware_power;
 
-    EVENT_DATA (EVENT_OPENCL_DEVICE_INIT_POST, &device_id, sizeof (u32));
+    EVENT_DATA (EVENT_OPENCL_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int));
   }
 
   if (user_options->benchmark == false)
@@ -7751,9 +7827,9 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
@@ -7769,71 +7845,74 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
     hcfree (device_param->brain_link_out_buf);
     #endif
 
-    if (device_param->d_pws_buf)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_buf);
-    if (device_param->d_pws_amp_buf)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_amp_buf);
-    if (device_param->d_pws_comp_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_comp_buf);
-    if (device_param->d_pws_idx)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_idx);
-    if (device_param->d_rules)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_rules);
-    if (device_param->d_rules_c)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_rules_c);
-    if (device_param->d_combs)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_combs);
-    if (device_param->d_combs_c)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_combs_c);
-    if (device_param->d_bfs)            hc_clReleaseMemObject (hashcat_ctx, device_param->d_bfs);
-    if (device_param->d_bfs_c)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_bfs_c);
-    if (device_param->d_bitmap_s1_a)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_a);
-    if (device_param->d_bitmap_s1_b)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_b);
-    if (device_param->d_bitmap_s1_c)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_c);
-    if (device_param->d_bitmap_s1_d)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_d);
-    if (device_param->d_bitmap_s2_a)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_a);
-    if (device_param->d_bitmap_s2_b)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_b);
-    if (device_param->d_bitmap_s2_c)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_c);
-    if (device_param->d_bitmap_s2_d)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_d);
-    if (device_param->d_plain_bufs)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_plain_bufs);
-    if (device_param->d_digests_buf)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_digests_buf);
-    if (device_param->d_digests_shown)  hc_clReleaseMemObject (hashcat_ctx, device_param->d_digests_shown);
-    if (device_param->d_salt_bufs)      hc_clReleaseMemObject (hashcat_ctx, device_param->d_salt_bufs);
-    if (device_param->d_esalt_bufs)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_esalt_bufs);
-    if (device_param->d_tmps)           hc_clReleaseMemObject (hashcat_ctx, device_param->d_tmps);
-    if (device_param->d_hooks)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_hooks);
-    if (device_param->d_result)         hc_clReleaseMemObject (hashcat_ctx, device_param->d_result);
-    if (device_param->d_extra0_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra0_buf);
-    if (device_param->d_extra1_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra1_buf);
-    if (device_param->d_extra2_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra2_buf);
-    if (device_param->d_extra3_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra3_buf);
-    if (device_param->d_root_css_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_root_css_buf);
-    if (device_param->d_markov_css_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_markov_css_buf);
-    if (device_param->d_tm_c)           hc_clReleaseMemObject (hashcat_ctx, device_param->d_tm_c);
-    if (device_param->d_st_digests_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_digests_buf);
-    if (device_param->d_st_salts_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_salts_buf);
-    if (device_param->d_st_esalts_buf)  hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_esalts_buf);
+    if (device_param->is_opencl == true)
+    {
+      if (device_param->d_pws_buf)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_buf);
+      if (device_param->d_pws_amp_buf)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_amp_buf);
+      if (device_param->d_pws_comp_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_comp_buf);
+      if (device_param->d_pws_idx)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_idx);
+      if (device_param->d_rules)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_rules);
+      if (device_param->d_rules_c)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_rules_c);
+      if (device_param->d_combs)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_combs);
+      if (device_param->d_combs_c)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_combs_c);
+      if (device_param->d_bfs)            hc_clReleaseMemObject (hashcat_ctx, device_param->d_bfs);
+      if (device_param->d_bfs_c)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_bfs_c);
+      if (device_param->d_bitmap_s1_a)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_a);
+      if (device_param->d_bitmap_s1_b)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_b);
+      if (device_param->d_bitmap_s1_c)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_c);
+      if (device_param->d_bitmap_s1_d)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_d);
+      if (device_param->d_bitmap_s2_a)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_a);
+      if (device_param->d_bitmap_s2_b)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_b);
+      if (device_param->d_bitmap_s2_c)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_c);
+      if (device_param->d_bitmap_s2_d)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_d);
+      if (device_param->d_plain_bufs)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_plain_bufs);
+      if (device_param->d_digests_buf)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_digests_buf);
+      if (device_param->d_digests_shown)  hc_clReleaseMemObject (hashcat_ctx, device_param->d_digests_shown);
+      if (device_param->d_salt_bufs)      hc_clReleaseMemObject (hashcat_ctx, device_param->d_salt_bufs);
+      if (device_param->d_esalt_bufs)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_esalt_bufs);
+      if (device_param->d_tmps)           hc_clReleaseMemObject (hashcat_ctx, device_param->d_tmps);
+      if (device_param->d_hooks)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_hooks);
+      if (device_param->d_result)         hc_clReleaseMemObject (hashcat_ctx, device_param->d_result);
+      if (device_param->d_extra0_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra0_buf);
+      if (device_param->d_extra1_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra1_buf);
+      if (device_param->d_extra2_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra2_buf);
+      if (device_param->d_extra3_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra3_buf);
+      if (device_param->d_root_css_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_root_css_buf);
+      if (device_param->d_markov_css_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_markov_css_buf);
+      if (device_param->d_tm_c)           hc_clReleaseMemObject (hashcat_ctx, device_param->d_tm_c);
+      if (device_param->d_st_digests_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_digests_buf);
+      if (device_param->d_st_salts_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_salts_buf);
+      if (device_param->d_st_esalts_buf)  hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_esalts_buf);
 
-    if (device_param->kernel1)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel1);
-    if (device_param->kernel12)         hc_clReleaseKernel (hashcat_ctx, device_param->kernel12);
-    if (device_param->kernel2)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel2);
-    if (device_param->kernel23)         hc_clReleaseKernel (hashcat_ctx, device_param->kernel23);
-    if (device_param->kernel3)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel3);
-    if (device_param->kernel4)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel4);
-    if (device_param->kernel_init2)     hc_clReleaseKernel (hashcat_ctx, device_param->kernel_init2);
-    if (device_param->kernel_loop2)     hc_clReleaseKernel (hashcat_ctx, device_param->kernel_loop2);
-    if (device_param->kernel_mp)        hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp);
-    if (device_param->kernel_mp_l)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp_l);
-    if (device_param->kernel_mp_r)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp_r);
-    if (device_param->kernel_tm)        hc_clReleaseKernel (hashcat_ctx, device_param->kernel_tm);
-    if (device_param->kernel_amp)       hc_clReleaseKernel (hashcat_ctx, device_param->kernel_amp);
-    if (device_param->kernel_memset)    hc_clReleaseKernel (hashcat_ctx, device_param->kernel_memset);
-    if (device_param->kernel_atinit)    hc_clReleaseKernel (hashcat_ctx, device_param->kernel_atinit);
-    if (device_param->kernel_decompress)hc_clReleaseKernel (hashcat_ctx, device_param->kernel_decompress);
-    if (device_param->kernel_aux1)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux1);
-    if (device_param->kernel_aux2)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux2);
-    if (device_param->kernel_aux3)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux3);
-    if (device_param->kernel_aux4)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux4);
+      if (device_param->kernel1)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel1);
+      if (device_param->kernel12)         hc_clReleaseKernel (hashcat_ctx, device_param->kernel12);
+      if (device_param->kernel2)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel2);
+      if (device_param->kernel23)         hc_clReleaseKernel (hashcat_ctx, device_param->kernel23);
+      if (device_param->kernel3)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel3);
+      if (device_param->kernel4)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel4);
+      if (device_param->kernel_init2)     hc_clReleaseKernel (hashcat_ctx, device_param->kernel_init2);
+      if (device_param->kernel_loop2)     hc_clReleaseKernel (hashcat_ctx, device_param->kernel_loop2);
+      if (device_param->kernel_mp)        hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp);
+      if (device_param->kernel_mp_l)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp_l);
+      if (device_param->kernel_mp_r)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp_r);
+      if (device_param->kernel_tm)        hc_clReleaseKernel (hashcat_ctx, device_param->kernel_tm);
+      if (device_param->kernel_amp)       hc_clReleaseKernel (hashcat_ctx, device_param->kernel_amp);
+      if (device_param->kernel_memset)    hc_clReleaseKernel (hashcat_ctx, device_param->kernel_memset);
+      if (device_param->kernel_atinit)    hc_clReleaseKernel (hashcat_ctx, device_param->kernel_atinit);
+      if (device_param->kernel_decompress)hc_clReleaseKernel (hashcat_ctx, device_param->kernel_decompress);
+      if (device_param->kernel_aux1)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux1);
+      if (device_param->kernel_aux2)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux2);
+      if (device_param->kernel_aux3)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux3);
+      if (device_param->kernel_aux4)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux4);
 
-    if (device_param->program)          hc_clReleaseProgram (hashcat_ctx, device_param->program);
-    if (device_param->program_mp)       hc_clReleaseProgram (hashcat_ctx, device_param->program_mp);
-    if (device_param->program_amp)      hc_clReleaseProgram (hashcat_ctx, device_param->program_amp);
+      if (device_param->program)          hc_clReleaseProgram (hashcat_ctx, device_param->program);
+      if (device_param->program_mp)       hc_clReleaseProgram (hashcat_ctx, device_param->program_mp);
+      if (device_param->program_amp)      hc_clReleaseProgram (hashcat_ctx, device_param->program_amp);
 
-    if (device_param->command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->command_queue);
+      if (device_param->command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->command_queue);
 
-    if (device_param->context)          hc_clReleaseContext (hashcat_ctx, device_param->context);
+      if (device_param->context)          hc_clReleaseContext (hashcat_ctx, device_param->context);
+    }
 
     device_param->pws_comp            = NULL;
     device_param->pws_idx             = NULL;
@@ -7917,9 +7996,9 @@ void backend_session_reset (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
@@ -7965,14 +8044,14 @@ int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
 {
   combinator_ctx_t *combinator_ctx = hashcat_ctx->combinator_ctx;
   hashconfig_t     *hashconfig     = hashcat_ctx->hashconfig;
-  backend_ctx_t     *backend_ctx     = hashcat_ctx->backend_ctx;
+  backend_ctx_t    *backend_ctx    = hashcat_ctx->backend_ctx;
   user_options_t   *user_options   = hashcat_ctx->user_options;
 
   if (backend_ctx->enabled == false) return 0;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
@@ -7983,17 +8062,20 @@ int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
     device_param->kernel_params_buf32[33] = combinator_ctx->combs_mode;
 
     /*
-    int CL_rc;
+    if (device_param->is_opencl == true)
+    {
+      int CL_rc;
 
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel1, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel3, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel4, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel1, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel3, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel4, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
 
-    if (hashconfig->opts_type & OPTS_TYPE_HOOK12) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel12,     33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
-    if (hashconfig->opts_type & OPTS_TYPE_HOOK23) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel23,     33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
-    if (hashconfig->opts_type & OPTS_TYPE_INIT2)  { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_init2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
-    if (hashconfig->opts_type & OPTS_TYPE_LOOP2)  { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_loop2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+      if (hashconfig->opts_type & OPTS_TYPE_HOOK12) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel12,     33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+      if (hashconfig->opts_type & OPTS_TYPE_HOOK23) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel23,     33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+      if (hashconfig->opts_type & OPTS_TYPE_INIT2)  { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_init2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+      if (hashconfig->opts_type & OPTS_TYPE_LOOP2)  { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_loop2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+    }
     */
 
     // kernel_params_amp
@@ -8007,11 +8089,14 @@ int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
 
       if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
       {
-        int CL_rc;
+        if (device_param->is_opencl == true)
+        {
+          int CL_rc;
 
-        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
 
-        if (CL_rc == -1) return -1;
+          if (CL_rc == -1) return -1;
+        }
       }
     }
   }
@@ -8022,16 +8107,16 @@ int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
 int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
 {
   mask_ctx_t     *mask_ctx     = hashcat_ctx->mask_ctx;
-  backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   user_options_t *user_options = hashcat_ctx->user_options;
 
   if (backend_ctx->enabled == false) return 0;
 
   if (user_options->slow_candidates == true) return 0;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
@@ -8040,13 +8125,16 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
     device_param->kernel_params_mp_buf64[3] = 0;
     device_param->kernel_params_mp_buf32[4] = mask_ctx->css_cnt;
 
-    int CL_rc = CL_SUCCESS;
+    if (device_param->is_opencl == true)
+    {
+      int CL_rc = CL_SUCCESS;
 
-    for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
-    for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_uint), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_uint),  device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
 
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    }
   }
 
   return 0;
@@ -8062,9 +8150,9 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
 
   if (user_options->slow_candidates == true) return 0;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
@@ -8077,18 +8165,21 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
     device_param->kernel_params_mp_r_buf64[3] = 0;
     device_param->kernel_params_mp_r_buf32[4] = css_cnt_r;
 
-    int CL_rc = CL_SUCCESS;
+    if (device_param->is_opencl == true)
+    {
+      int CL_rc = CL_SUCCESS;
 
-    for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
-    for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_uint),  device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
-    for (u32 i = 9; i < 9; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_uint),  device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 9; i < 9; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
 
-    for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
-    for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_uint),  device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
-    for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_uint),  device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
 
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    }
   }
 
   return 0;
diff --git a/src/dispatch.c b/src/dispatch.c
index 466b16604..44cdb59ce 100644
--- a/src/dispatch.c
+++ b/src/dispatch.c
@@ -27,9 +27,9 @@ static u64 get_highest_words_done (const hashcat_ctx_t *hashcat_ctx)
 
   u64 words_cur = 0;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
@@ -49,9 +49,9 @@ static u64 get_lowest_words_done (const hashcat_ctx_t *hashcat_ctx)
 
   u64 words_cur = 0xffffffffffffffff;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
diff --git a/src/hashcat.c b/src/hashcat.c
index 68fe33d73..11d212243 100644
--- a/src/hashcat.c
+++ b/src/hashcat.c
@@ -180,9 +180,9 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
    * prepare thread buffers
    */
 
-  thread_param_t *threads_param = (thread_param_t *) hccalloc (backend_ctx->devices_cnt, sizeof (thread_param_t));
+  thread_param_t *threads_param = (thread_param_t *) hccalloc (backend_ctx->backend_devices_cnt, sizeof (thread_param_t));
 
-  hc_thread_t *c_threads = (hc_thread_t *) hccalloc (backend_ctx->devices_cnt, sizeof (hc_thread_t));
+  hc_thread_t *c_threads = (hc_thread_t *) hccalloc (backend_ctx->backend_devices_cnt, sizeof (hc_thread_t));
 
   /**
    * create autotune threads
@@ -192,17 +192,17 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
 
   status_ctx->devices_status = STATUS_AUTOTUNE;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    thread_param_t *thread_param = threads_param + device_id;
+    thread_param_t *thread_param = threads_param + backend_devices_idx;
 
     thread_param->hashcat_ctx = hashcat_ctx;
-    thread_param->tid         = device_id;
+    thread_param->tid         = backend_devices_idx;
 
-    hc_thread_create (c_threads[device_id], thread_autotune, thread_param);
+    hc_thread_create (c_threads[backend_devices_idx], thread_autotune, thread_param);
   }
 
-  hc_thread_wait (backend_ctx->devices_cnt, c_threads);
+  hc_thread_wait (backend_ctx->backend_devices_cnt, c_threads);
 
   EVENT (EVENT_AUTOTUNE_FINISHED);
 
@@ -249,24 +249,24 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
 
   status_ctx->accessible = true;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    thread_param_t *thread_param = threads_param + device_id;
+    thread_param_t *thread_param = threads_param + backend_devices_idx;
 
     thread_param->hashcat_ctx = hashcat_ctx;
-    thread_param->tid         = device_id;
+    thread_param->tid         = backend_devices_idx;
 
     if (user_options_extra->wordlist_mode == WL_MODE_STDIN)
     {
-      hc_thread_create (c_threads[device_id], thread_calc_stdin, thread_param);
+      hc_thread_create (c_threads[backend_devices_idx], thread_calc_stdin, thread_param);
     }
     else
     {
-      hc_thread_create (c_threads[device_id], thread_calc, thread_param);
+      hc_thread_create (c_threads[backend_devices_idx], thread_calc, thread_param);
     }
   }
 
-  hc_thread_wait (backend_ctx->devices_cnt, c_threads);
+  hc_thread_wait (backend_ctx->backend_devices_cnt, c_threads);
 
   hcfree (c_threads);
 
@@ -736,23 +736,23 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
   {
     EVENT (EVENT_SELFTEST_STARTING);
 
-    thread_param_t *threads_param = (thread_param_t *) hccalloc (backend_ctx->devices_cnt, sizeof (thread_param_t));
+    thread_param_t *threads_param = (thread_param_t *) hccalloc (backend_ctx->backend_devices_cnt, sizeof (thread_param_t));
 
-    hc_thread_t *selftest_threads = (hc_thread_t *) hccalloc (backend_ctx->devices_cnt, sizeof (hc_thread_t));
+    hc_thread_t *selftest_threads = (hc_thread_t *) hccalloc (backend_ctx->backend_devices_cnt, sizeof (hc_thread_t));
 
     status_ctx->devices_status = STATUS_SELFTEST;
 
-    for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+    for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
     {
-      thread_param_t *thread_param = threads_param + device_id;
+      thread_param_t *thread_param = threads_param + backend_devices_idx;
 
       thread_param->hashcat_ctx = hashcat_ctx;
-      thread_param->tid         = device_id;
+      thread_param->tid         = backend_devices_idx;
 
-      hc_thread_create (selftest_threads[device_id], thread_selftest, thread_param);
+      hc_thread_create (selftest_threads[backend_devices_idx], thread_selftest, thread_param);
     }
 
-    hc_thread_wait (backend_ctx->devices_cnt, selftest_threads);
+    hc_thread_wait (backend_ctx->backend_devices_cnt, selftest_threads);
 
     hcfree (threads_param);
 
@@ -760,11 +760,11 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
 
     // check for any selftest failures
 
-    for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+    for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
     {
       if (backend_ctx->enabled == false) continue;
 
-      hc_device_param_t *device_param = backend_ctx->devices_param + device_id;
+      hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
 
       if (device_param->skipped == true) continue;
 
diff --git a/src/hwmon.c b/src/hwmon.c
index 4c8dca868..7d710a44c 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -45,11 +45,11 @@ static void sysfs_close (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-static char *hm_SYSFS_get_syspath_device (hashcat_ctx_t *hashcat_ctx, const int device_id)
+static char *hm_SYSFS_get_syspath_device (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_device_idx];
 
   char *syspath;
 
@@ -58,9 +58,9 @@ static char *hm_SYSFS_get_syspath_device (hashcat_ctx_t *hashcat_ctx, const int
   return syspath;
 }
 
-static char *hm_SYSFS_get_syspath_hwmon (hashcat_ctx_t *hashcat_ctx, const int device_id)
+static char *hm_SYSFS_get_syspath_hwmon (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
-  char *syspath = hm_SYSFS_get_syspath_device (hashcat_ctx, device_id);
+  char *syspath = hm_SYSFS_get_syspath_device (hashcat_ctx, backend_device_idx);
 
   if (syspath == NULL)
   {
@@ -96,9 +96,9 @@ static char *hm_SYSFS_get_syspath_hwmon (hashcat_ctx_t *hashcat_ctx, const int d
   return hwmon;
 }
 
-static int hm_SYSFS_get_fan_speed_current (hashcat_ctx_t *hashcat_ctx, const int device_id, int *val)
+static int hm_SYSFS_get_fan_speed_current (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx, int *val)
 {
-  char *syspath = hm_SYSFS_get_syspath_hwmon (hashcat_ctx, device_id);
+  char *syspath = hm_SYSFS_get_syspath_hwmon (hashcat_ctx, backend_device_idx);
 
   if (syspath == NULL) return -1;
 
@@ -188,9 +188,9 @@ static int hm_SYSFS_get_fan_speed_current (hashcat_ctx_t *hashcat_ctx, const int
   return 0;
 }
 
-static int hm_SYSFS_get_temperature_current (hashcat_ctx_t *hashcat_ctx, const int device_id, int *val)
+static int hm_SYSFS_get_temperature_current (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx, int *val)
 {
-  char *syspath = hm_SYSFS_get_syspath_hwmon (hashcat_ctx, device_id);
+  char *syspath = hm_SYSFS_get_syspath_hwmon (hashcat_ctx, backend_device_idx);
 
   if (syspath == NULL) return -1;
 
@@ -233,9 +233,9 @@ static int hm_SYSFS_get_temperature_current (hashcat_ctx_t *hashcat_ctx, const i
   return 0;
 }
 
-static int hm_SYSFS_get_pp_dpm_sclk (hashcat_ctx_t *hashcat_ctx, const int device_id, int *val)
+static int hm_SYSFS_get_pp_dpm_sclk (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx, int *val)
 {
-  char *syspath = hm_SYSFS_get_syspath_device (hashcat_ctx, device_id);
+  char *syspath = hm_SYSFS_get_syspath_device (hashcat_ctx, backend_device_idx);
 
   if (syspath == NULL) return -1;
 
@@ -288,9 +288,9 @@ static int hm_SYSFS_get_pp_dpm_sclk (hashcat_ctx_t *hashcat_ctx, const int devic
   return 0;
 }
 
-static int hm_SYSFS_get_pp_dpm_mclk (hashcat_ctx_t *hashcat_ctx, const int device_id, int *val)
+static int hm_SYSFS_get_pp_dpm_mclk (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx, int *val)
 {
-  char *syspath = hm_SYSFS_get_syspath_device (hashcat_ctx, device_id);
+  char *syspath = hm_SYSFS_get_syspath_device (hashcat_ctx, backend_device_idx);
 
   if (syspath == NULL) return -1;
 
@@ -343,9 +343,9 @@ static int hm_SYSFS_get_pp_dpm_mclk (hashcat_ctx_t *hashcat_ctx, const int devic
   return 0;
 }
 
-static int hm_SYSFS_get_pp_dpm_pcie (hashcat_ctx_t *hashcat_ctx, const int device_id, int *val)
+static int hm_SYSFS_get_pp_dpm_pcie (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx, int *val)
 {
-  char *syspath = hm_SYSFS_get_syspath_device (hashcat_ctx, device_id);
+  char *syspath = hm_SYSFS_get_syspath_device (hashcat_ctx, backend_device_idx);
 
   if (syspath == NULL) return -1;
 
@@ -1342,33 +1342,33 @@ static int hm_get_adapter_index_nvml (hashcat_ctx_t *hashcat_ctx, HM_ADAPTER_NVM
   return (deviceCount);
 }
 
-int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
-      if (hwmon_ctx->hm_device[device_id].od_version == 5)
+      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
       {
 
       }
-      else if (hwmon_ctx->hm_device[device_id].od_version == 6)
+      else if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
       {
         int CurrentValue = 0;
         int DefaultValue = 0;
 
-        if (hm_ADL_Overdrive6_TargetTemperatureData_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, &CurrentValue, &DefaultValue) == -1)
+        if (hm_ADL_Overdrive6_TargetTemperatureData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &CurrentValue, &DefaultValue) == -1)
         {
-          hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported = false;
+          hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false;
 
           return -1;
         }
@@ -1380,15 +1380,15 @@ int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
       int target = 0;
 
-      if (hm_NVML_nvmlDeviceGetTemperatureThreshold (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, NVML_TEMPERATURE_THRESHOLD_SLOWDOWN, (unsigned int *) &target) == -1)
+      if (hm_NVML_nvmlDeviceGetTemperatureThreshold (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_TEMPERATURE_THRESHOLD_SLOWDOWN, (unsigned int *) &target) == -1)
       {
-        hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false;
 
         return -1;
       }
@@ -1397,46 +1397,46 @@ int hm_get_threshold_slowdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
     }
   }
 
-  hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false;
 
   return -1;
 }
 
-int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_threshold_shutdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].threshold_shutdown_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
-      if (hwmon_ctx->hm_device[device_id].od_version == 5)
+      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
       {
 
       }
-      else if (hwmon_ctx->hm_device[device_id].od_version == 6)
+      else if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
       {
 
       }
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
       int target = 0;
 
-      if (hm_NVML_nvmlDeviceGetTemperatureThreshold (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, (unsigned int *) &target) == -1)
+      if (hm_NVML_nvmlDeviceGetTemperatureThreshold (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, (unsigned int *) &target) == -1)
       {
-        hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].threshold_shutdown_get_supported = false;
 
         return -1;
       }
@@ -1445,35 +1445,35 @@ int hm_get_threshold_shutdown_with_device_id (hashcat_ctx_t *hashcat_ctx, const
     }
   }
 
-  hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].threshold_shutdown_get_supported = false;
 
   return -1;
 }
 
-int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].temperature_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
-      if (hwmon_ctx->hm_device[device_id].od_version == 5)
+      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
       {
         ADLTemperature Temperature;
 
         Temperature.iSize = sizeof (ADLTemperature);
 
-        if (hm_ADL_Overdrive5_Temperature_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, 0, &Temperature) == -1)
+        if (hm_ADL_Overdrive5_Temperature_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &Temperature) == -1)
         {
-          hwmon_ctx->hm_device[device_id].temperature_get_supported = false;
+          hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
 
           return -1;
         }
@@ -1481,13 +1481,13 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
         return Temperature.iTemperature / 1000;
       }
 
-      if (hwmon_ctx->hm_device[device_id].od_version == 6)
+      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
       {
         int Temperature = 0;
 
-        if (hm_ADL_Overdrive6_Temperature_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, &Temperature) == -1)
+        if (hm_ADL_Overdrive6_Temperature_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &Temperature) == -1)
         {
-          hwmon_ctx->hm_device[device_id].temperature_get_supported = false;
+          hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
 
           return -1;
         }
@@ -1500,9 +1500,9 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     {
       int temperature = 0;
 
-      if (hm_SYSFS_get_temperature_current (hashcat_ctx, device_id, &temperature) == -1)
+      if (hm_SYSFS_get_temperature_current (hashcat_ctx, backend_device_idx, &temperature) == -1)
       {
-        hwmon_ctx->hm_device[device_id].temperature_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
 
         return -1;
       }
@@ -1511,15 +1511,15 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
       int temperature = 0;
 
-      if (hm_NVML_nvmlDeviceGetTemperature (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, NVML_TEMPERATURE_GPU, (u32 *) &temperature) == -1)
+      if (hm_NVML_nvmlDeviceGetTemperature (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_TEMPERATURE_GPU, (u32 *) &temperature) == -1)
       {
-        hwmon_ctx->hm_device[device_id].temperature_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
 
         return -1;
       }
@@ -1528,27 +1528,27 @@ int hm_get_temperature_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  hwmon_ctx->hm_device[device_id].temperature_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
 
   return -1;
 }
 
-int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_fanpolicy_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].fanpolicy_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
-      if (hwmon_ctx->hm_device[device_id].od_version == 5)
+      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
       {
         ADLFanSpeedValue lpFanSpeedValue;
 
@@ -1557,10 +1557,10 @@ int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
         lpFanSpeedValue.iSize      = sizeof (lpFanSpeedValue);
         lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT;
 
-        if (hm_ADL_Overdrive5_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, 0, &lpFanSpeedValue) == -1)
+        if (hm_ADL_Overdrive5_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &lpFanSpeedValue) == -1)
         {
-          hwmon_ctx->hm_device[device_id].fanpolicy_get_supported = false;
-          hwmon_ctx->hm_device[device_id].fanspeed_get_supported  = false;
+          hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false;
+          hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported  = false;
 
           return -1;
         }
@@ -1568,7 +1568,7 @@ int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
         return (lpFanSpeedValue.iFanSpeed & ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED) ? 0 : 1;
       }
 
-      if (hwmon_ctx->hm_device[device_id].od_version == 6)
+      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
       {
         return 1;
       }
@@ -1580,33 +1580,33 @@ int hm_get_fanpolicy_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     return 1;
   }
 
-  hwmon_ctx->hm_device[device_id].fanpolicy_get_supported = false;
-  hwmon_ctx->hm_device[device_id].fanspeed_get_supported  = false;
+  hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported  = false;
 
   return -1;
 }
 
-int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_fanspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].fanspeed_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
-      if (hwmon_ctx->hm_device[device_id].od_version == 5)
+      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
       {
         ADLFanSpeedValue lpFanSpeedValue;
 
@@ -1616,9 +1616,9 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
         lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT;
         lpFanSpeedValue.iFlags     = ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED;
 
-        if (hm_ADL_Overdrive5_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, 0, &lpFanSpeedValue) == -1)
+        if (hm_ADL_Overdrive5_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &lpFanSpeedValue) == -1)
         {
-          hwmon_ctx->hm_device[device_id].fanspeed_get_supported = false;
+          hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
 
           return -1;
         }
@@ -1626,15 +1626,15 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
         return lpFanSpeedValue.iFanSpeed;
       }
 
-      if (hwmon_ctx->hm_device[device_id].od_version == 6)
+      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
       {
         ADLOD6FanSpeedInfo faninfo;
 
         memset (&faninfo, 0, sizeof (faninfo));
 
-        if (hm_ADL_Overdrive6_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, &faninfo) == -1)
+        if (hm_ADL_Overdrive6_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &faninfo) == -1)
         {
-          hwmon_ctx->hm_device[device_id].fanspeed_get_supported = false;
+          hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
 
           return -1;
         }
@@ -1647,9 +1647,9 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     {
       int speed = 0;
 
-      if (hm_SYSFS_get_fan_speed_current (hashcat_ctx, device_id, &speed) == -1)
+      if (hm_SYSFS_get_fan_speed_current (hashcat_ctx, backend_device_idx, &speed) == -1)
       {
-        hwmon_ctx->hm_device[device_id].fanspeed_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
 
         return -1;
       }
@@ -1658,15 +1658,15 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
       int speed = 0;
 
-      if (hm_NVML_nvmlDeviceGetFanSpeed (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, (u32 *) &speed) == -1)
+      if (hm_NVML_nvmlDeviceGetFanSpeed (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, (u32 *) &speed) == -1)
       {
-        hwmon_ctx->hm_device[device_id].fanspeed_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
 
         return -1;
       }
@@ -1675,23 +1675,23 @@ int hm_get_fanspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     }
   }
 
-  hwmon_ctx->hm_device[device_id].fanspeed_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
 
   return -1;
 }
 
-int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].buslanes_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1699,9 +1699,9 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
 
       PMActivity.iSize = sizeof (ADLPMActivity);
 
-      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, &PMActivity) == -1)
+      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
       {
-        hwmon_ctx->hm_device[device_id].buslanes_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
 
         return -1;
       }
@@ -1713,9 +1713,9 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     {
       int lanes;
 
-      if (hm_SYSFS_get_pp_dpm_pcie (hashcat_ctx, device_id, &lanes) == -1)
+      if (hm_SYSFS_get_pp_dpm_pcie (hashcat_ctx, backend_device_idx, &lanes) == -1)
       {
-        hwmon_ctx->hm_device[device_id].buslanes_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
 
         return -1;
       }
@@ -1724,15 +1724,15 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
       unsigned int currLinkWidth;
 
-      if (hm_NVML_nvmlDeviceGetCurrPcieLinkWidth (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, &currLinkWidth) == -1)
+      if (hm_NVML_nvmlDeviceGetCurrPcieLinkWidth (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &currLinkWidth) == -1)
       {
-        hwmon_ctx->hm_device[device_id].buslanes_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
 
         return -1;
       }
@@ -1741,23 +1741,23 @@ int hm_get_buslanes_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
     }
   }
 
-  hwmon_ctx->hm_device[device_id].buslanes_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
 
   return -1;
 }
 
-int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].utilization_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1765,9 +1765,9 @@ int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
       PMActivity.iSize = sizeof (ADLPMActivity);
 
-      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, &PMActivity) == -1)
+      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
       {
-        hwmon_ctx->hm_device[device_id].utilization_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
 
         return -1;
       }
@@ -1776,15 +1776,15 @@ int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
       nvmlUtilization_t utilization;
 
-      if (hm_NVML_nvmlDeviceGetUtilizationRates (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, &utilization) == -1)
+      if (hm_NVML_nvmlDeviceGetUtilizationRates (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &utilization) == -1)
       {
-        hwmon_ctx->hm_device[device_id].utilization_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
 
         return -1;
       }
@@ -1793,23 +1793,23 @@ int hm_get_utilization_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  hwmon_ctx->hm_device[device_id].utilization_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
 
   return -1;
 }
 
-int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].memoryspeed_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1817,9 +1817,9 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
 
       PMActivity.iSize = sizeof (ADLPMActivity);
 
-      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, &PMActivity) == -1)
+      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
       {
-        hwmon_ctx->hm_device[device_id].memoryspeed_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
 
         return -1;
       }
@@ -1831,9 +1831,9 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     {
       int clockfreq;
 
-      if (hm_SYSFS_get_pp_dpm_mclk (hashcat_ctx, device_id, &clockfreq) == -1)
+      if (hm_SYSFS_get_pp_dpm_mclk (hashcat_ctx, backend_device_idx, &clockfreq) == -1)
       {
-        hwmon_ctx->hm_device[device_id].memoryspeed_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
 
         return -1;
       }
@@ -1842,15 +1842,15 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
       unsigned int clockfreq;
 
-      if (hm_NVML_nvmlDeviceGetClockInfo (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, NVML_CLOCK_MEM, &clockfreq) == -1)
+      if (hm_NVML_nvmlDeviceGetClockInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_CLOCK_MEM, &clockfreq) == -1)
       {
-        hwmon_ctx->hm_device[device_id].memoryspeed_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
 
         return -1;
       }
@@ -1859,23 +1859,23 @@ int hm_get_memoryspeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 dev
     }
   }
 
-  hwmon_ctx->hm_device[device_id].memoryspeed_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
 
   return -1;
 }
 
-int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].corespeed_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     if (hwmon_ctx->hm_adl)
     {
@@ -1883,9 +1883,9 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
 
       PMActivity.iSize = sizeof (ADLPMActivity);
 
-      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[device_id].adl, &PMActivity) == -1)
+      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
       {
-        hwmon_ctx->hm_device[device_id].corespeed_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
 
         return -1;
       }
@@ -1897,9 +1897,9 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
     {
       int clockfreq;
 
-      if (hm_SYSFS_get_pp_dpm_sclk (hashcat_ctx, device_id, &clockfreq) == -1)
+      if (hm_SYSFS_get_pp_dpm_sclk (hashcat_ctx, backend_device_idx, &clockfreq) == -1)
       {
-        hwmon_ctx->hm_device[device_id].corespeed_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
 
         return -1;
       }
@@ -1908,15 +1908,15 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
     }
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
       unsigned int clockfreq;
 
-      if (hm_NVML_nvmlDeviceGetClockInfo (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, NVML_CLOCK_SM, &clockfreq) == -1)
+      if (hm_NVML_nvmlDeviceGetClockInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_CLOCK_SM, &clockfreq) == -1)
       {
-        hwmon_ctx->hm_device[device_id].corespeed_get_supported = false;
+        hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
 
         return -1;
       }
@@ -1925,27 +1925,27 @@ int hm_get_corespeed_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 devic
     }
   }
 
-  hwmon_ctx->hm_device[device_id].corespeed_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
 
   return -1;
 }
 
-int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device_id)
+int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int backend_device_idx)
 {
   hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (hwmon_ctx->enabled == false) return -1;
 
-  if (hwmon_ctx->hm_device[device_id].throttle_get_supported == false) return -1;
+  if (hwmon_ctx->hm_device[backend_device_idx].throttle_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
   {
   }
 
-  if (backend_ctx->devices_param[device_id].device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1953,8 +1953,8 @@ int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
       unsigned long long clocksThrottleReasons = 0;
       unsigned long long supportedThrottleReasons = 0;
 
-      if (hm_NVML_nvmlDeviceGetCurrentClocksThrottleReasons   (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, &clocksThrottleReasons)    == -1) return -1;
-      if (hm_NVML_nvmlDeviceGetSupportedClocksThrottleReasons (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvml, &supportedThrottleReasons) == -1) return -1;
+      if (hm_NVML_nvmlDeviceGetCurrentClocksThrottleReasons   (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &clocksThrottleReasons)    == -1) return -1;
+      if (hm_NVML_nvmlDeviceGetSupportedClocksThrottleReasons (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &supportedThrottleReasons) == -1) return -1;
 
       clocksThrottleReasons &=  supportedThrottleReasons;
       clocksThrottleReasons &= ~nvmlClocksThrottleReasonGpuIdle;
@@ -1981,17 +1981,17 @@ int hm_get_throttle_with_device_id (hashcat_ctx_t *hashcat_ctx, const u32 device
       perfPolicies_info.version   = MAKE_NVAPI_VERSION (NV_GPU_PERF_POLICIES_INFO_PARAMS_V1, 1);
       perfPolicies_status.version = MAKE_NVAPI_VERSION (NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1, 1);
 
-      hm_NvAPI_GPU_GetPerfPoliciesInfo (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvapi, &perfPolicies_info);
+      hm_NvAPI_GPU_GetPerfPoliciesInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvapi, &perfPolicies_info);
 
       perfPolicies_status.info_value = perfPolicies_info.info_value;
 
-      hm_NvAPI_GPU_GetPerfPoliciesStatus (hashcat_ctx, hwmon_ctx->hm_device[device_id].nvapi, &perfPolicies_status);
+      hm_NvAPI_GPU_GetPerfPoliciesStatus (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvapi, &perfPolicies_status);
 
       return perfPolicies_status.throttle & 2;
     }
   }
 
-  hwmon_ctx->hm_device[device_id].throttle_get_supported = false;
+  hwmon_ctx->hm_device[backend_device_idx].throttle_get_supported = false;
 
   return -1;
 }
@@ -2102,15 +2102,15 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
       int tmp_in = hm_get_adapter_index_nvml (hashcat_ctx, nvmlGPUHandle);
 
-      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+      for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
         if (device_param->skipped == true) continue;
 
         if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
-        if (device_param->device_vendor_id != VENDOR_ID_NV) continue;
+        if (device_param->opencl_device_vendor_id != VENDOR_ID_NV) continue;
 
         for (int i = 0; i < tmp_in; i++)
         {
@@ -2124,7 +2124,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
            && (device_param->pcie_device   == (pci.device >> 3))
            && (device_param->pcie_function == (pci.device & 7)))
           {
-            const u32 platform_devices_id = device_param->platform_devices_id;
+            const u32 platform_devices_id = device_param->opencl_platform_devices_id;
 
             hm_adapters_nvml[platform_devices_id].nvml = nvmlGPUHandle[i];
 
@@ -2152,15 +2152,15 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
       int tmp_in = hm_get_adapter_index_nvapi (hashcat_ctx, nvGPUHandle);
 
-      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+      for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
         if (device_param->skipped == true) continue;
 
         if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
-        if (device_param->device_vendor_id != VENDOR_ID_NV) continue;
+        if (device_param->opencl_device_vendor_id != VENDOR_ID_NV) continue;
 
         for (int i = 0; i < tmp_in; i++)
         {
@@ -2179,7 +2179,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
            && (device_param->pcie_device   == (BusSlotId >> 3))
            && (device_param->pcie_function == (BusSlotId & 7)))
           {
-            const u32 platform_devices_id = device_param->platform_devices_id;
+            const u32 platform_devices_id = device_param->opencl_platform_devices_id;
 
             hm_adapters_nvapi[platform_devices_id].nvapi = nvGPUHandle[i];
 
@@ -2221,15 +2221,15 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
         return -1;
       }
 
-      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+      for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
         if (device_param->skipped == true) continue;
 
         if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
-        if (device_param->device_vendor_id != VENDOR_ID_AMD) continue;
+        if (device_param->opencl_device_vendor_id != VENDOR_ID_AMD) continue;
 
         for (int i = 0; i < tmp_in; i++)
         {
@@ -2237,7 +2237,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
            && (device_param->pcie_device   == (lpAdapterInfo[i].iDeviceNumber >> 3))
            && (device_param->pcie_function == (lpAdapterInfo[i].iDeviceNumber & 7)))
           {
-            const u32 platform_devices_id = device_param->platform_devices_id;
+            const u32 platform_devices_id = device_param->opencl_platform_devices_id;
 
             int od_supported = 0;
             int od_enabled   = 0;
@@ -2271,13 +2271,13 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     {
       int hm_adapters_id = 0;
 
-      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+      for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
         if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
-        hm_adapters_sysfs[hm_adapters_id].sysfs = device_id;
+        hm_adapters_sysfs[hm_adapters_id].sysfs = backend_devices_idx; // ????
 
         hm_adapters_sysfs[hm_adapters_id].buslanes_get_supported    = true;
         hm_adapters_sysfs[hm_adapters_id].corespeed_get_supported   = true;
@@ -2308,111 +2308,111 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
    * save buffer required for later restores
    */
 
-  hwmon_ctx->od_clock_mem_status = (ADLOD6MemClockState *) hccalloc (backend_ctx->devices_cnt, sizeof (ADLOD6MemClockState));
+  hwmon_ctx->od_clock_mem_status = (ADLOD6MemClockState *) hccalloc (backend_ctx->backend_devices_cnt, sizeof (ADLOD6MemClockState));
 
   /**
    * HM devices: copy
    */
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+    hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     if (device_param->skipped == true) continue;
 
     if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
-    const u32 platform_devices_id = device_param->platform_devices_id;
+    const u32 platform_devices_id = device_param->opencl_platform_devices_id;
 
-    if (device_param->device_vendor_id == VENDOR_ID_AMD)
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
     {
-      hwmon_ctx->hm_device[device_id].adl         = hm_adapters_adl[platform_devices_id].adl;
-      hwmon_ctx->hm_device[device_id].sysfs       = hm_adapters_sysfs[platform_devices_id].sysfs;
-      hwmon_ctx->hm_device[device_id].nvapi       = 0;
-      hwmon_ctx->hm_device[device_id].nvml        = 0;
-      hwmon_ctx->hm_device[device_id].od_version  = 0;
+      hwmon_ctx->hm_device[backend_devices_idx].adl         = hm_adapters_adl[platform_devices_id].adl;
+      hwmon_ctx->hm_device[backend_devices_idx].sysfs       = hm_adapters_sysfs[platform_devices_id].sysfs;
+      hwmon_ctx->hm_device[backend_devices_idx].nvapi       = 0;
+      hwmon_ctx->hm_device[backend_devices_idx].nvml        = 0;
+      hwmon_ctx->hm_device[backend_devices_idx].od_version  = 0;
 
       if (hwmon_ctx->hm_adl)
       {
-        hwmon_ctx->hm_device[device_id].od_version = hm_adapters_adl[platform_devices_id].od_version;
+        hwmon_ctx->hm_device[backend_devices_idx].od_version = hm_adapters_adl[platform_devices_id].od_version;
 
-        hwmon_ctx->hm_device[device_id].buslanes_get_supported            |= hm_adapters_adl[platform_devices_id].buslanes_get_supported;
-        hwmon_ctx->hm_device[device_id].corespeed_get_supported           |= hm_adapters_adl[platform_devices_id].corespeed_get_supported;
-        hwmon_ctx->hm_device[device_id].fanspeed_get_supported            |= hm_adapters_adl[platform_devices_id].fanspeed_get_supported;
-        hwmon_ctx->hm_device[device_id].fanpolicy_get_supported           |= hm_adapters_adl[platform_devices_id].fanpolicy_get_supported;
-        hwmon_ctx->hm_device[device_id].memoryspeed_get_supported         |= hm_adapters_adl[platform_devices_id].memoryspeed_get_supported;
-        hwmon_ctx->hm_device[device_id].temperature_get_supported         |= hm_adapters_adl[platform_devices_id].temperature_get_supported;
-        hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported  |= hm_adapters_adl[platform_devices_id].threshold_shutdown_get_supported;
-        hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported  |= hm_adapters_adl[platform_devices_id].threshold_slowdown_get_supported;
-        hwmon_ctx->hm_device[device_id].throttle_get_supported            |= hm_adapters_adl[platform_devices_id].throttle_get_supported;
-        hwmon_ctx->hm_device[device_id].utilization_get_supported         |= hm_adapters_adl[platform_devices_id].utilization_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_adl[platform_devices_id].buslanes_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_adl[platform_devices_id].corespeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_adl[platform_devices_id].fanspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_adl[platform_devices_id].fanpolicy_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_adl[platform_devices_id].memoryspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_adl[platform_devices_id].temperature_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_adl[platform_devices_id].threshold_shutdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_adl[platform_devices_id].threshold_slowdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_adl[platform_devices_id].throttle_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_adl[platform_devices_id].utilization_get_supported;
       }
 
       if (hwmon_ctx->hm_sysfs)
       {
-        hwmon_ctx->hm_device[device_id].buslanes_get_supported            |= hm_adapters_sysfs[platform_devices_id].buslanes_get_supported;
-        hwmon_ctx->hm_device[device_id].corespeed_get_supported           |= hm_adapters_sysfs[platform_devices_id].corespeed_get_supported;
-        hwmon_ctx->hm_device[device_id].fanspeed_get_supported            |= hm_adapters_sysfs[platform_devices_id].fanspeed_get_supported;
-        hwmon_ctx->hm_device[device_id].fanpolicy_get_supported           |= hm_adapters_sysfs[platform_devices_id].fanpolicy_get_supported;
-        hwmon_ctx->hm_device[device_id].memoryspeed_get_supported         |= hm_adapters_sysfs[platform_devices_id].memoryspeed_get_supported;
-        hwmon_ctx->hm_device[device_id].temperature_get_supported         |= hm_adapters_sysfs[platform_devices_id].temperature_get_supported;
-        hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported  |= hm_adapters_sysfs[platform_devices_id].threshold_shutdown_get_supported;
-        hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported  |= hm_adapters_sysfs[platform_devices_id].threshold_slowdown_get_supported;
-        hwmon_ctx->hm_device[device_id].throttle_get_supported            |= hm_adapters_sysfs[platform_devices_id].throttle_get_supported;
-        hwmon_ctx->hm_device[device_id].utilization_get_supported         |= hm_adapters_sysfs[platform_devices_id].utilization_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_sysfs[platform_devices_id].buslanes_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_sysfs[platform_devices_id].corespeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_sysfs[platform_devices_id].fanspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_sysfs[platform_devices_id].fanpolicy_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_sysfs[platform_devices_id].memoryspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_sysfs[platform_devices_id].temperature_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_sysfs[platform_devices_id].threshold_shutdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_sysfs[platform_devices_id].threshold_slowdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_sysfs[platform_devices_id].throttle_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_sysfs[platform_devices_id].utilization_get_supported;
       }
     }
 
-    if (device_param->device_vendor_id == VENDOR_ID_NV)
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
     {
-      hwmon_ctx->hm_device[device_id].adl         = 0;
-      hwmon_ctx->hm_device[device_id].sysfs       = 0;
-      hwmon_ctx->hm_device[device_id].nvapi       = hm_adapters_nvapi[platform_devices_id].nvapi;
-      hwmon_ctx->hm_device[device_id].nvml        = hm_adapters_nvml[platform_devices_id].nvml;
-      hwmon_ctx->hm_device[device_id].od_version  = 0;
+      hwmon_ctx->hm_device[backend_devices_idx].adl         = 0;
+      hwmon_ctx->hm_device[backend_devices_idx].sysfs       = 0;
+      hwmon_ctx->hm_device[backend_devices_idx].nvapi       = hm_adapters_nvapi[platform_devices_id].nvapi;
+      hwmon_ctx->hm_device[backend_devices_idx].nvml        = hm_adapters_nvml[platform_devices_id].nvml;
+      hwmon_ctx->hm_device[backend_devices_idx].od_version  = 0;
 
       if (hwmon_ctx->hm_nvml)
       {
-        hwmon_ctx->hm_device[device_id].buslanes_get_supported            |= hm_adapters_nvml[platform_devices_id].buslanes_get_supported;
-        hwmon_ctx->hm_device[device_id].corespeed_get_supported           |= hm_adapters_nvml[platform_devices_id].corespeed_get_supported;
-        hwmon_ctx->hm_device[device_id].fanspeed_get_supported            |= hm_adapters_nvml[platform_devices_id].fanspeed_get_supported;
-        hwmon_ctx->hm_device[device_id].fanpolicy_get_supported           |= hm_adapters_nvml[platform_devices_id].fanpolicy_get_supported;
-        hwmon_ctx->hm_device[device_id].memoryspeed_get_supported         |= hm_adapters_nvml[platform_devices_id].memoryspeed_get_supported;
-        hwmon_ctx->hm_device[device_id].temperature_get_supported         |= hm_adapters_nvml[platform_devices_id].temperature_get_supported;
-        hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported  |= hm_adapters_nvml[platform_devices_id].threshold_shutdown_get_supported;
-        hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported  |= hm_adapters_nvml[platform_devices_id].threshold_slowdown_get_supported;
-        hwmon_ctx->hm_device[device_id].throttle_get_supported            |= hm_adapters_nvml[platform_devices_id].throttle_get_supported;
-        hwmon_ctx->hm_device[device_id].utilization_get_supported         |= hm_adapters_nvml[platform_devices_id].utilization_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_nvml[platform_devices_id].buslanes_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_nvml[platform_devices_id].corespeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_nvml[platform_devices_id].fanspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_nvml[platform_devices_id].fanpolicy_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_nvml[platform_devices_id].memoryspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_nvml[platform_devices_id].temperature_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_nvml[platform_devices_id].threshold_shutdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_nvml[platform_devices_id].threshold_slowdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_nvml[platform_devices_id].throttle_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_nvml[platform_devices_id].utilization_get_supported;
       }
 
       if (hwmon_ctx->hm_nvapi)
       {
-        hwmon_ctx->hm_device[device_id].buslanes_get_supported            |= hm_adapters_nvapi[platform_devices_id].buslanes_get_supported;
-        hwmon_ctx->hm_device[device_id].corespeed_get_supported           |= hm_adapters_nvapi[platform_devices_id].corespeed_get_supported;
-        hwmon_ctx->hm_device[device_id].fanspeed_get_supported            |= hm_adapters_nvapi[platform_devices_id].fanspeed_get_supported;
-        hwmon_ctx->hm_device[device_id].fanpolicy_get_supported           |= hm_adapters_nvapi[platform_devices_id].fanpolicy_get_supported;
-        hwmon_ctx->hm_device[device_id].memoryspeed_get_supported         |= hm_adapters_nvapi[platform_devices_id].memoryspeed_get_supported;
-        hwmon_ctx->hm_device[device_id].temperature_get_supported         |= hm_adapters_nvapi[platform_devices_id].temperature_get_supported;
-        hwmon_ctx->hm_device[device_id].threshold_shutdown_get_supported  |= hm_adapters_nvapi[platform_devices_id].threshold_shutdown_get_supported;
-        hwmon_ctx->hm_device[device_id].threshold_slowdown_get_supported  |= hm_adapters_nvapi[platform_devices_id].threshold_slowdown_get_supported;
-        hwmon_ctx->hm_device[device_id].throttle_get_supported            |= hm_adapters_nvapi[platform_devices_id].throttle_get_supported;
-        hwmon_ctx->hm_device[device_id].utilization_get_supported         |= hm_adapters_nvapi[platform_devices_id].utilization_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_nvapi[platform_devices_id].buslanes_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_nvapi[platform_devices_id].corespeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_nvapi[platform_devices_id].fanspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_nvapi[platform_devices_id].fanpolicy_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_nvapi[platform_devices_id].memoryspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_nvapi[platform_devices_id].temperature_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_nvapi[platform_devices_id].threshold_shutdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_nvapi[platform_devices_id].threshold_slowdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_nvapi[platform_devices_id].throttle_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_nvapi[platform_devices_id].utilization_get_supported;
       }
     }
 
     // by calling the different functions here this will disable them in case they will error out
     // this will also reduce the error itself printed to the user to a single print on startup
 
-    hm_get_buslanes_with_device_id            (hashcat_ctx, device_id);
-    hm_get_corespeed_with_device_id           (hashcat_ctx, device_id);
-    hm_get_fanpolicy_with_device_id           (hashcat_ctx, device_id);
-    hm_get_fanspeed_with_device_id            (hashcat_ctx, device_id);
-    hm_get_memoryspeed_with_device_id         (hashcat_ctx, device_id);
-    hm_get_temperature_with_device_id         (hashcat_ctx, device_id);
-    hm_get_threshold_shutdown_with_device_id  (hashcat_ctx, device_id);
-    hm_get_threshold_slowdown_with_device_id  (hashcat_ctx, device_id);
-    hm_get_throttle_with_device_id            (hashcat_ctx, device_id);
-    hm_get_utilization_with_device_id         (hashcat_ctx, device_id);
+    hm_get_buslanes_with_devices_idx           (hashcat_ctx, backend_devices_idx);
+    hm_get_corespeed_with_devices_idx          (hashcat_ctx, backend_devices_idx);
+    hm_get_fanpolicy_with_devices_idx          (hashcat_ctx, backend_devices_idx);
+    hm_get_fanspeed_with_devices_idx           (hashcat_ctx, backend_devices_idx);
+    hm_get_memoryspeed_with_devices_idx        (hashcat_ctx, backend_devices_idx);
+    hm_get_temperature_with_devices_idx        (hashcat_ctx, backend_devices_idx);
+    hm_get_threshold_shutdown_with_devices_idx (hashcat_ctx, backend_devices_idx);
+    hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx, backend_devices_idx);
+    hm_get_throttle_with_devices_idx           (hashcat_ctx, backend_devices_idx);
+    hm_get_utilization_with_devices_idx        (hashcat_ctx, backend_devices_idx);
   }
 
   FREE_ADAPTERS;
diff --git a/src/modules/module_01450.c b/src/modules/module_01450.c
index 59d68bb85..59e2dc5c9 100644
--- a/src/modules/module_01450.c
+++ b/src/modules/module_01450.c
@@ -46,12 +46,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if (device_param->device_vendor_id == VENDOR_ID_AMD)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_01720.c b/src/modules/module_01720.c
index 0355b04f5..5ed6b8366 100644
--- a/src/modules/module_01720.c
+++ b/src/modules/module_01720.c
@@ -52,12 +52,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_01722.c b/src/modules/module_01722.c
index d944ab14d..7348b6da2 100644
--- a/src/modules/module_01722.c
+++ b/src/modules/module_01722.c
@@ -53,12 +53,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_01740.c b/src/modules/module_01740.c
index cc0f134ee..fd57cde23 100644
--- a/src/modules/module_01740.c
+++ b/src/modules/module_01740.c
@@ -53,12 +53,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_01750.c b/src/modules/module_01750.c
index 81004ce8f..f3a0762ba 100644
--- a/src/modules/module_01750.c
+++ b/src/modules/module_01750.c
@@ -47,12 +47,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if (device_param->device_vendor_id == VENDOR_ID_AMD)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_01760.c b/src/modules/module_01760.c
index 2d3a71c93..eac9b387c 100644
--- a/src/modules/module_01760.c
+++ b/src/modules/module_01760.c
@@ -48,12 +48,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if (device_param->device_vendor_id == VENDOR_ID_AMD)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_03200.c b/src/modules/module_03200.c
index 8a9cb5b7b..6fd2ecea9 100644
--- a/src/modules/module_03200.c
+++ b/src/modules/module_03200.c
@@ -96,7 +96,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   {
     u32 overhead = 0;
 
-    if (device_param->device_vendor_id == VENDOR_ID_NV)
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
     {
       // note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
       // Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
diff --git a/src/modules/module_06400.c b/src/modules/module_06400.c
index 266ed1d9a..e1c443b8c 100644
--- a/src/modules/module_06400.c
+++ b/src/modules/module_06400.c
@@ -258,12 +258,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_06800.c b/src/modules/module_06800.c
index 3d4e18349..60a6e3b42 100644
--- a/src/modules/module_06800.c
+++ b/src/modules/module_06800.c
@@ -72,12 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_07500.c b/src/modules/module_07500.c
index 1026921bc..080520f59 100644
--- a/src/modules/module_07500.c
+++ b/src/modules/module_07500.c
@@ -79,7 +79,7 @@ u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYB
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: Segmentation fault
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
     {
diff --git a/src/modules/module_07800.c b/src/modules/module_07800.c
index 8a711709f..e1285ef13 100644
--- a/src/modules/module_07800.c
+++ b/src/modules/module_07800.c
@@ -54,7 +54,7 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04:  password not found
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_07801.c b/src/modules/module_07801.c
index d49b320fe..997698939 100644
--- a/src/modules/module_07801.c
+++ b/src/modules/module_07801.c
@@ -54,7 +54,7 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04:  password not found
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_07900.c b/src/modules/module_07900.c
index 6e1102e56..61b1be24a 100644
--- a/src/modules/module_07900.c
+++ b/src/modules/module_07900.c
@@ -286,10 +286,10 @@ static void drupal7_encode (const u8 digest[64], u8 buf[43])
 
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
-  if (device_param->platform_vendor_id == VENDOR_ID_APPLE)
+  if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
   {
     // trap 6
-    if ((device_param->device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
+    if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
     {
       return true;
     }
diff --git a/src/modules/module_08000.c b/src/modules/module_08000.c
index 1bf634173..a4cfc5ac8 100644
--- a/src/modules/module_08000.c
+++ b/src/modules/module_08000.c
@@ -60,7 +60,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_AMD)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_08600.c b/src/modules/module_08600.c
index 3b73ab178..e28be3e08 100644
--- a/src/modules/module_08600.c
+++ b/src/modules/module_08600.c
@@ -53,7 +53,7 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: Segmentation fault
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_09000.c b/src/modules/module_09000.c
index 464f47d97..8817fd4b6 100644
--- a/src/modules/module_09000.c
+++ b/src/modules/module_09000.c
@@ -90,7 +90,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       u32 overhead = 0;
 
-      if (device_param->device_vendor_id == VENDOR_ID_NV)
+      if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
       {
         overhead = 4;
       }
@@ -121,7 +121,7 @@ bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // OpenCL 1.2 pocl HSTR: pthread-x86_64-pc-linux-gnu-skylake: Segmentation fault
-  if (device_param->platform_vendor_id == VENDOR_ID_POCL)
+  if (device_param->opencl_platform_vendor_id == VENDOR_ID_POCL)
   {
     return true;
   }
diff --git a/src/modules/module_09200.c b/src/modules/module_09200.c
index c56010732..2e103343a 100644
--- a/src/modules/module_09200.c
+++ b/src/modules/module_09200.c
@@ -88,7 +88,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_09800.c b/src/modules/module_09800.c
index d9e428808..0a8107648 100644
--- a/src/modules/module_09800.c
+++ b/src/modules/module_09800.c
@@ -88,7 +88,7 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // OpenCL 1.2 pocl HSTR: pthread-x86_64-pc-linux-gnu-skylake: Segmentation fault
-  if (device_param->platform_vendor_id == VENDOR_ID_POCL)
+  if (device_param->opencl_platform_vendor_id == VENDOR_ID_POCL)
   {
     return true;
   }
diff --git a/src/modules/module_10700.c b/src/modules/module_10700.c
index 3e69fce27..dc5a16da0 100644
--- a/src/modules/module_10700.c
+++ b/src/modules/module_10700.c
@@ -109,13 +109,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // OpenCL 1.2 pocl HSTR: pthread-x86_64-pc-linux-gnu-skylake: Segmentation fault
-  if (device_param->platform_vendor_id == VENDOR_ID_POCL)
+  if (device_param->opencl_platform_vendor_id == VENDOR_ID_POCL)
   {
     return true;
   }
 
   // l_opencl_p_18.1.0.013: password not found
-  if (device_param->device_vendor_id == VENDOR_ID_INTEL_SDK)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
     {
@@ -124,7 +124,7 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   }
 
   // amdgpu-pro-18.50-708488-ubuntu-18.04: Segmentation fault
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 1)
     {
@@ -133,7 +133,7 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   }
 
   // amdgpu-pro-18.50-708488-ubuntu-18.04: self-test failed.
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
     {
diff --git a/src/modules/module_10800.c b/src/modules/module_10800.c
index 840d81e95..7b653aecb 100644
--- a/src/modules/module_10800.c
+++ b/src/modules/module_10800.c
@@ -52,12 +52,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_10900.c b/src/modules/module_10900.c
index 00087a62c..2c2581c33 100644
--- a/src/modules/module_10900.c
+++ b/src/modules/module_10900.c
@@ -89,12 +89,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_11000.c b/src/modules/module_11000.c
index 3f1e12d04..e02b85a76 100644
--- a/src/modules/module_11000.c
+++ b/src/modules/module_11000.c
@@ -62,7 +62,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_11600.c b/src/modules/module_11600.c
index 75b09e003..39199f477 100644
--- a/src/modules/module_11600.c
+++ b/src/modules/module_11600.c
@@ -292,7 +292,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
@@ -303,7 +303,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: Segmentation fault
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_11700.c b/src/modules/module_11700.c
index 38105da7f..02b9af2d6 100644
--- a/src/modules/module_11700.c
+++ b/src/modules/module_11700.c
@@ -44,7 +44,7 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: CL_OUT_OF_RESOURCES
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
     {
diff --git a/src/modules/module_11750.c b/src/modules/module_11750.c
index c1cefdcde..1956e6c8c 100644
--- a/src/modules/module_11750.c
+++ b/src/modules/module_11750.c
@@ -44,7 +44,7 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: CL_OUT_OF_RESOURCES
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_11760.c b/src/modules/module_11760.c
index 9f0bf39e9..0fbcc4eeb 100644
--- a/src/modules/module_11760.c
+++ b/src/modules/module_11760.c
@@ -44,7 +44,7 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: CL_OUT_OF_RESOURCES
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_11800.c b/src/modules/module_11800.c
index 30c837067..f43fce332 100644
--- a/src/modules/module_11800.c
+++ b/src/modules/module_11800.c
@@ -44,7 +44,7 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: CL_OUT_OF_RESOURCES
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
     {
diff --git a/src/modules/module_11850.c b/src/modules/module_11850.c
index a68af6734..c18476582 100644
--- a/src/modules/module_11850.c
+++ b/src/modules/module_11850.c
@@ -44,7 +44,7 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: CL_OUT_OF_RESOURCES
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_11860.c b/src/modules/module_11860.c
index e64ec7128..92e2d632d 100644
--- a/src/modules/module_11860.c
+++ b/src/modules/module_11860.c
@@ -44,7 +44,7 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: CL_OUT_OF_RESOURCES
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_12100.c b/src/modules/module_12100.c
index c4b90fb63..4881c81c2 100644
--- a/src/modules/module_12100.c
+++ b/src/modules/module_12100.c
@@ -90,7 +90,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_AMD)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_12200.c b/src/modules/module_12200.c
index 979500a15..b7ed28edd 100644
--- a/src/modules/module_12200.c
+++ b/src/modules/module_12200.c
@@ -72,12 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_12500.c b/src/modules/module_12500.c
index 0390de089..bbc30f8dc 100644
--- a/src/modules/module_12500.c
+++ b/src/modules/module_12500.c
@@ -94,7 +94,7 @@ const char *module_benchmark_mask (MAYBE_UNUSED const hashconfig_t *hashconfig,
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: self-test failed
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_12800.c b/src/modules/module_12800.c
index ef6ae6f0f..4bc490512 100644
--- a/src/modules/module_12800.c
+++ b/src/modules/module_12800.c
@@ -74,12 +74,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_12900.c b/src/modules/module_12900.c
index 6821c1b7a..4fda260a7 100644
--- a/src/modules/module_12900.c
+++ b/src/modules/module_12900.c
@@ -74,12 +74,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_13000.c b/src/modules/module_13000.c
index 78cb7ca63..eeeae3aa2 100644
--- a/src/modules/module_13000.c
+++ b/src/modules/module_13000.c
@@ -87,12 +87,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_13100.c b/src/modules/module_13100.c
index 8619a0d19..5882af5a1 100644
--- a/src/modules/module_13100.c
+++ b/src/modules/module_13100.c
@@ -76,7 +76,7 @@ u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYB
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: CL_OUT_OF_RESOURCES
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
     {
diff --git a/src/modules/module_14100.c b/src/modules/module_14100.c
index 86b56ba6f..361eca88b 100644
--- a/src/modules/module_14100.c
+++ b/src/modules/module_14100.c
@@ -111,7 +111,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_14400.c b/src/modules/module_14400.c
index 784e3e30b..15581846c 100644
--- a/src/modules/module_14400.c
+++ b/src/modules/module_14400.c
@@ -59,7 +59,7 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: Segmentation fault
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 1)
     {
diff --git a/src/modules/module_15000.c b/src/modules/module_15000.c
index 3e31238c2..4c2133d67 100644
--- a/src/modules/module_15000.c
+++ b/src/modules/module_15000.c
@@ -63,12 +63,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_15300.c b/src/modules/module_15300.c
index 90fdfad25..04800456e 100644
--- a/src/modules/module_15300.c
+++ b/src/modules/module_15300.c
@@ -100,7 +100,7 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: self-test failed
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_15600.c b/src/modules/module_15600.c
index 12b939426..67a9388c6 100644
--- a/src/modules/module_15600.c
+++ b/src/modules/module_15600.c
@@ -90,12 +90,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_15700.c b/src/modules/module_15700.c
index 40c444fa7..258b1f7b0 100644
--- a/src/modules/module_15700.c
+++ b/src/modules/module_15700.c
@@ -248,7 +248,7 @@ u64 module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UN
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: Segmentation fault
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_15900.c b/src/modules/module_15900.c
index f239dc7a7..2b19f5213 100644
--- a/src/modules/module_15900.c
+++ b/src/modules/module_15900.c
@@ -100,7 +100,7 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: self-test failed
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_16200.c b/src/modules/module_16200.c
index 827a2d1d4..601156681 100644
--- a/src/modules/module_16200.c
+++ b/src/modules/module_16200.c
@@ -80,12 +80,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_16300.c b/src/modules/module_16300.c
index 4f2fe3752..433a804df 100644
--- a/src/modules/module_16300.c
+++ b/src/modules/module_16300.c
@@ -81,12 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_16700.c b/src/modules/module_16700.c
index 829d32db1..91c20048f 100644
--- a/src/modules/module_16700.c
+++ b/src/modules/module_16700.c
@@ -80,12 +80,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_16900.c b/src/modules/module_16900.c
index e936485ed..b4ef8258a 100644
--- a/src/modules/module_16900.c
+++ b/src/modules/module_16900.c
@@ -91,12 +91,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_17300.c b/src/modules/module_17300.c
index 14d6d92c2..0778a8f5d 100644
--- a/src/modules/module_17300.c
+++ b/src/modules/module_17300.c
@@ -48,7 +48,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   char *jit_build_options = NULL;
 
   // -Wpass-failed=transform-warning
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_17400.c b/src/modules/module_17400.c
index ecee00700..171c3c5b4 100644
--- a/src/modules/module_17400.c
+++ b/src/modules/module_17400.c
@@ -48,7 +48,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   char *jit_build_options = NULL;
 
   // -Wpass-failed=transform-warning
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_17500.c b/src/modules/module_17500.c
index 98ed35e10..9b663af2a 100644
--- a/src/modules/module_17500.c
+++ b/src/modules/module_17500.c
@@ -48,7 +48,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   char *jit_build_options = NULL;
 
   // -Wpass-failed=transform-warning
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_17600.c b/src/modules/module_17600.c
index 0c14c6505..9c58a09ec 100644
--- a/src/modules/module_17600.c
+++ b/src/modules/module_17600.c
@@ -48,7 +48,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   char *jit_build_options = NULL;
 
   // -Wpass-failed=transform-warning
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_17700.c b/src/modules/module_17700.c
index 698c4ccfb..f4ade222b 100644
--- a/src/modules/module_17700.c
+++ b/src/modules/module_17700.c
@@ -48,7 +48,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   char *jit_build_options = NULL;
 
   // -Wpass-failed=transform-warning
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_17800.c b/src/modules/module_17800.c
index 2499549b1..6b68526ac 100644
--- a/src/modules/module_17800.c
+++ b/src/modules/module_17800.c
@@ -48,7 +48,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   char *jit_build_options = NULL;
 
   // -Wpass-failed=transform-warning
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_17900.c b/src/modules/module_17900.c
index 0d37632bb..d11d902fa 100644
--- a/src/modules/module_17900.c
+++ b/src/modules/module_17900.c
@@ -48,7 +48,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   char *jit_build_options = NULL;
 
   // -Wpass-failed=transform-warning
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_18000.c b/src/modules/module_18000.c
index 7bc209743..f74c91160 100644
--- a/src/modules/module_18000.c
+++ b/src/modules/module_18000.c
@@ -48,7 +48,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   char *jit_build_options = NULL;
 
   // -Wpass-failed=transform-warning
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_18100.c b/src/modules/module_18100.c
index 2d4c5041e..69f2d4d32 100644
--- a/src/modules/module_18100.c
+++ b/src/modules/module_18100.c
@@ -53,7 +53,7 @@ int module_build_plain_postprocess (MAYBE_UNUSED const hashconfig_t *hashconfig,
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: Segmentation fault
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     return true;
   }
diff --git a/src/modules/module_18200.c b/src/modules/module_18200.c
index 22b1549dc..c188c8227 100644
--- a/src/modules/module_18200.c
+++ b/src/modules/module_18200.c
@@ -78,7 +78,7 @@ u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYB
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: CL_OUT_OF_RESOURCES
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
     {
diff --git a/src/modules/module_18300.c b/src/modules/module_18300.c
index 8602329d1..7c3624133 100644
--- a/src/modules/module_18300.c
+++ b/src/modules/module_18300.c
@@ -80,12 +80,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_18600.c b/src/modules/module_18600.c
index c1d743e9b..109a3f65c 100644
--- a/src/modules/module_18600.c
+++ b/src/modules/module_18600.c
@@ -82,7 +82,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
     {
       u32 overhead = 0;
 
-      if (device_param->device_vendor_id == VENDOR_ID_NV)
+      if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
       {
         overhead = 4;
       }
@@ -123,7 +123,7 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // OpenCL 1.2 pocl HSTR: pthread-x86_64-pc-linux-gnu-skylake: self-test failed
-  if (device_param->platform_vendor_id == VENDOR_ID_POCL)
+  if (device_param->opencl_platform_vendor_id == VENDOR_ID_POCL)
   {
     return true;
   }
diff --git a/src/modules/module_19100.c b/src/modules/module_19100.c
index 8214f7622..44f6275b4 100644
--- a/src/modules/module_19100.c
+++ b/src/modules/module_19100.c
@@ -61,12 +61,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if ((device_param->device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
+  if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/modules/module_19200.c b/src/modules/module_19200.c
index bd768e5f6..77a1c80bf 100644
--- a/src/modules/module_19200.c
+++ b/src/modules/module_19200.c
@@ -63,12 +63,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
-  if (device_param->device_vendor_id == VENDOR_ID_NV)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
 
-  if (device_param->device_vendor_id == VENDOR_ID_AMD)
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
   {
     hc_asprintf (&jit_build_options, "-D NO_UNROLL");
   }
diff --git a/src/monitor.c b/src/monitor.c
index aec2220d3..6317d6f70 100644
--- a/src/monitor.c
+++ b/src/monitor.c
@@ -114,33 +114,33 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
     {
       hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+      for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
         if (device_param->skipped == true) continue;
 
-        if ((backend_ctx->devices_param[device_id].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if ((backend_ctx->devices_param[backend_devices_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
-        const int temperature = hm_get_temperature_with_device_id (hashcat_ctx, device_id);
+        const int temperature = hm_get_temperature_with_devices_idx (hashcat_ctx, backend_devices_idx);
 
         if (temperature > (int) user_options->hwmon_temp_abort)
         {
-          EVENT_DATA (EVENT_MONITOR_TEMP_ABORT, &device_id, sizeof (u32));
+          EVENT_DATA (EVENT_MONITOR_TEMP_ABORT, &backend_devices_idx, sizeof (int));
 
           myabort (hashcat_ctx);
         }
       }
 
-      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+      for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
         if (device_param->skipped == true) continue;
 
         if (device_param->skipped_warning == true) continue;
 
-        const int rc_throttle = hm_get_throttle_with_device_id (hashcat_ctx, device_id);
+        const int rc_throttle = hm_get_throttle_with_devices_idx (hashcat_ctx, backend_devices_idx);
 
         if (rc_throttle == -1) continue;
 
@@ -148,9 +148,9 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
         {
           slowdown_warnings++;
 
-          if (slowdown_warnings == 1) EVENT_DATA (EVENT_MONITOR_THROTTLE1, &device_id, sizeof (u32));
-          if (slowdown_warnings == 2) EVENT_DATA (EVENT_MONITOR_THROTTLE2, &device_id, sizeof (u32));
-          if (slowdown_warnings == 3) EVENT_DATA (EVENT_MONITOR_THROTTLE3, &device_id, sizeof (u32));
+          if (slowdown_warnings == 1) EVENT_DATA (EVENT_MONITOR_THROTTLE1, &backend_devices_idx, sizeof (int));
+          if (slowdown_warnings == 2) EVENT_DATA (EVENT_MONITOR_THROTTLE2, &backend_devices_idx, sizeof (int));
+          if (slowdown_warnings == 3) EVENT_DATA (EVENT_MONITOR_THROTTLE3, &backend_devices_idx, sizeof (int));
         }
         else
         {
@@ -232,9 +232,9 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
 
       hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-      for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+      for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
         if (device_param->skipped == true) continue;
 
@@ -242,11 +242,11 @@ static int monitor (hashcat_ctx_t *hashcat_ctx)
 
         exec_cnt++;
 
-        const double exec = status_get_exec_msec_dev (hashcat_ctx, device_id);
+        const double exec = status_get_exec_msec_dev (hashcat_ctx, backend_devices_idx);
 
         exec_total += exec;
 
-        const int util = hm_get_utilization_with_device_id (hashcat_ctx, device_id);
+        const int util = hm_get_utilization_with_devices_idx (hashcat_ctx, backend_devices_idx);
 
         if (util == -1) continue;
 
diff --git a/src/status.c b/src/status.c
index 3086c6066..5594c2607 100644
--- a/src/status.c
+++ b/src/status.c
@@ -202,30 +202,30 @@ int status_get_device_info_cnt (const hashcat_ctx_t *hashcat_ctx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  return backend_ctx->devices_cnt;
+  return backend_ctx->backend_devices_cnt;
 }
 
 int status_get_device_info_active (const hashcat_ctx_t *hashcat_ctx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  return backend_ctx->devices_active;
+  return backend_ctx->backend_devices_active;
 }
 
-bool status_get_skipped_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+bool status_get_skipped_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   return device_param->skipped;
 }
 
-bool status_get_skipped_warning_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+bool status_get_skipped_warning_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   return device_param->skipped_warning;
 }
@@ -833,7 +833,7 @@ int status_get_guess_mask_length (const hashcat_ctx_t *hashcat_ctx)
   return mp_get_length (mask_ctx->mask);
 }
 
-char *status_get_guess_candidates_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+char *status_get_guess_candidates_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const hashconfig_t         *hashconfig         = hashcat_ctx->hashconfig;
   const backend_ctx_t        *backend_ctx        = hashcat_ctx->backend_ctx;
@@ -842,7 +842,7 @@ char *status_get_guess_candidates_dev (const hashcat_ctx_t *hashcat_ctx, const i
 
   if (status_ctx->accessible == false) return NULL;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   char *display = (char *) hcmalloc (HCBUFSIZ_TINY);
 
@@ -1414,22 +1414,22 @@ double status_get_hashes_msec_all (const hashcat_ctx_t *hashcat_ctx)
 
   double hashes_all_msec = 0;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    hashes_all_msec += status_get_hashes_msec_dev (hashcat_ctx, device_id);
+    hashes_all_msec += status_get_hashes_msec_dev (hashcat_ctx, backend_devices_idx);
   }
 
   return hashes_all_msec;
 }
 
-double status_get_hashes_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+double status_get_hashes_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   u64    speed_cnt  = 0;
   double speed_msec = 0;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if ((device_param->skipped == false) && (device_param->skipped_warning == false))
   {
@@ -1455,7 +1455,7 @@ double status_get_hashes_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int d
   return hashes_dev_msec;
 }
 
-double status_get_hashes_msec_dev_benchmark (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+double status_get_hashes_msec_dev_benchmark (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   // this function increases accuracy for benchmark modes
 
@@ -1464,7 +1464,7 @@ double status_get_hashes_msec_dev_benchmark (const hashcat_ctx_t *hashcat_ctx, c
   u64    speed_cnt  = 0;
   double speed_msec = 0;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if ((device_param->skipped == false) && (device_param->skipped_warning == false))
   {
@@ -1490,19 +1490,19 @@ double status_get_exec_msec_all (const hashcat_ctx_t *hashcat_ctx)
 
   double exec_all_msec = 0;
 
-  for (u32 device_id = 0; device_id < backend_ctx->devices_cnt; device_id++)
+  for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    exec_all_msec += status_get_exec_msec_dev (hashcat_ctx, device_id);
+    exec_all_msec += status_get_exec_msec_dev (hashcat_ctx, backend_devices_idx);
   }
 
   return exec_all_msec;
 }
 
-double status_get_exec_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+double status_get_exec_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   double exec_dev_msec = 0;
 
@@ -1525,9 +1525,9 @@ char *status_get_speed_sec_all (const hashcat_ctx_t *hashcat_ctx)
   return display;
 }
 
-char *status_get_speed_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+char *status_get_speed_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
-  const double hashes_msec_dev = status_get_hashes_msec_dev (hashcat_ctx, device_id);
+  const double hashes_msec_dev = status_get_hashes_msec_dev (hashcat_ctx, backend_devices_idx);
 
   char *display = (char *) hcmalloc (HCBUFSIZ_TINY);
 
@@ -1698,11 +1698,11 @@ char *status_get_cpt (const hashcat_ctx_t *hashcat_ctx)
   return cpt;
 }
 
-int status_get_salt_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_salt_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   int salt_pos = 0;
 
@@ -1714,11 +1714,11 @@ int status_get_salt_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_
   return salt_pos;
 }
 
-int status_get_innerloop_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_innerloop_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   int innerloop_pos = 0;
 
@@ -1730,11 +1730,11 @@ int status_get_innerloop_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int de
   return innerloop_pos;
 }
 
-int status_get_innerloop_left_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_innerloop_left_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   int innerloop_left = 0;
 
@@ -1746,11 +1746,11 @@ int status_get_innerloop_left_dev (const hashcat_ctx_t *hashcat_ctx, const int d
   return innerloop_left;
 }
 
-int status_get_iteration_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_iteration_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   int iteration_pos = 0;
 
@@ -1762,11 +1762,11 @@ int status_get_iteration_pos_dev (const hashcat_ctx_t *hashcat_ctx, const int de
   return iteration_pos;
 }
 
-int status_get_iteration_left_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_iteration_left_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   int iteration_left = 0;
 
@@ -1779,11 +1779,11 @@ int status_get_iteration_left_dev (const hashcat_ctx_t *hashcat_ctx, const int d
 }
 
 #ifdef WITH_BRAIN
-int status_get_brain_link_client_id_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_brain_link_client_id_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   int brain_client_id = -1;
 
@@ -1795,11 +1795,11 @@ int status_get_brain_link_client_id_dev (const hashcat_ctx_t *hashcat_ctx, const
   return brain_client_id;
 }
 
-int status_get_brain_link_status_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_brain_link_status_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   int brain_link_status_dev = 0;
 
@@ -1813,11 +1813,11 @@ int status_get_brain_link_status_dev (const hashcat_ctx_t *hashcat_ctx, const in
   return brain_link_status_dev;
 }
 
-char *status_get_brain_link_recv_bytes_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+char *status_get_brain_link_recv_bytes_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   u64 brain_link_recv_bytes = 0;
 
@@ -1833,11 +1833,11 @@ char *status_get_brain_link_recv_bytes_dev (const hashcat_ctx_t *hashcat_ctx, co
   return display;
 }
 
-char *status_get_brain_link_send_bytes_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+char *status_get_brain_link_send_bytes_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   u64 brain_link_send_bytes = 0;
 
@@ -1853,11 +1853,11 @@ char *status_get_brain_link_send_bytes_dev (const hashcat_ctx_t *hashcat_ctx, co
   return display;
 }
 
-char *status_get_brain_link_recv_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+char *status_get_brain_link_recv_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   u64 brain_link_recv_bytes = 0;
 
@@ -1880,11 +1880,11 @@ char *status_get_brain_link_recv_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx
   return display;
 }
 
-char *status_get_brain_link_send_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+char *status_get_brain_link_send_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   u64 brain_link_send_bytes = 0;
 
@@ -1908,11 +1908,11 @@ char *status_get_brain_link_send_bytes_sec_dev (const hashcat_ctx_t *hashcat_ctx
 }
 #endif
 
-char *status_get_hwmon_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+char *status_get_hwmon_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   char *output_buf = (char *) hcmalloc (HCBUFSIZ_TINY);
 
@@ -1926,12 +1926,12 @@ char *status_get_hwmon_dev (const hashcat_ctx_t *hashcat_ctx, const int device_i
 
   hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-  const int num_temperature = hm_get_temperature_with_device_id ((hashcat_ctx_t *) hashcat_ctx, device_id);
-  const int num_fanspeed    = hm_get_fanspeed_with_device_id    ((hashcat_ctx_t *) hashcat_ctx, device_id);
-  const int num_utilization = hm_get_utilization_with_device_id ((hashcat_ctx_t *) hashcat_ctx, device_id);
-  const int num_corespeed   = hm_get_corespeed_with_device_id   ((hashcat_ctx_t *) hashcat_ctx, device_id);
-  const int num_memoryspeed = hm_get_memoryspeed_with_device_id ((hashcat_ctx_t *) hashcat_ctx, device_id);
-  const int num_buslanes    = hm_get_buslanes_with_device_id    ((hashcat_ctx_t *) hashcat_ctx, device_id);
+  const int num_temperature = hm_get_temperature_with_devices_idx ((hashcat_ctx_t *) hashcat_ctx, backend_devices_idx);
+  const int num_fanspeed    = hm_get_fanspeed_with_devices_idx    ((hashcat_ctx_t *) hashcat_ctx, backend_devices_idx);
+  const int num_utilization = hm_get_utilization_with_devices_idx ((hashcat_ctx_t *) hashcat_ctx, backend_devices_idx);
+  const int num_corespeed   = hm_get_corespeed_with_devices_idx   ((hashcat_ctx_t *) hashcat_ctx, backend_devices_idx);
+  const int num_memoryspeed = hm_get_memoryspeed_with_devices_idx ((hashcat_ctx_t *) hashcat_ctx, backend_devices_idx);
+  const int num_buslanes    = hm_get_buslanes_with_devices_idx    ((hashcat_ctx_t *) hashcat_ctx, backend_devices_idx);
 
   int output_len = 0;
 
@@ -1981,11 +1981,11 @@ char *status_get_hwmon_dev (const hashcat_ctx_t *hashcat_ctx, const int device_i
   return output_buf;
 }
 
-int status_get_corespeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_corespeed_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if (device_param->skipped == true) return -1;
 
@@ -1995,18 +1995,18 @@ int status_get_corespeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device
 
   hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-  const int num_corespeed = hm_get_corespeed_with_device_id ((hashcat_ctx_t *) hashcat_ctx, device_id);
+  const int num_corespeed = hm_get_corespeed_with_devices_idx ((hashcat_ctx_t *) hashcat_ctx, backend_devices_idx);
 
   hc_thread_mutex_unlock (status_ctx->mux_hwmon);
 
   return num_corespeed;
 }
 
-int status_get_memoryspeed_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_memoryspeed_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if (device_param->skipped == true) return -1;
 
@@ -2016,18 +2016,18 @@ int status_get_memoryspeed_dev (const hashcat_ctx_t *hashcat_ctx, const int devi
 
   hc_thread_mutex_lock (status_ctx->mux_hwmon);
 
-  const int num_memoryspeed = hm_get_memoryspeed_with_device_id ((hashcat_ctx_t *) hashcat_ctx, device_id);
+  const int num_memoryspeed = hm_get_memoryspeed_with_devices_idx ((hashcat_ctx_t *) hashcat_ctx, backend_devices_idx);
 
   hc_thread_mutex_unlock (status_ctx->mux_hwmon);
 
   return num_memoryspeed;
 }
 
-u64 status_get_progress_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+u64 status_get_progress_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if (device_param->skipped == true) return 0;
 
@@ -2036,11 +2036,11 @@ u64 status_get_progress_dev (const hashcat_ctx_t *hashcat_ctx, const int device_
   return device_param->outerloop_left;
 }
 
-double status_get_runtime_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+double status_get_runtime_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if (device_param->skipped == true) return 0;
 
@@ -2049,11 +2049,11 @@ double status_get_runtime_msec_dev (const hashcat_ctx_t *hashcat_ctx, const int
   return device_param->outerloop_msec;
 }
 
-int status_get_kernel_accel_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_kernel_accel_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if (device_param->skipped == true) return 0;
 
@@ -2064,11 +2064,11 @@ int status_get_kernel_accel_dev (const hashcat_ctx_t *hashcat_ctx, const int dev
   return device_param->kernel_accel;
 }
 
-int status_get_kernel_loops_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_kernel_loops_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if (device_param->skipped == true) return 0;
 
@@ -2079,11 +2079,11 @@ int status_get_kernel_loops_dev (const hashcat_ctx_t *hashcat_ctx, const int dev
   return device_param->kernel_loops;
 }
 
-int status_get_kernel_threads_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_kernel_threads_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if (device_param->skipped == true) return 0;
 
@@ -2092,11 +2092,11 @@ int status_get_kernel_threads_dev (const hashcat_ctx_t *hashcat_ctx, const int d
   return device_param->kernel_threads;
 }
 
-int status_get_vector_width_dev (const hashcat_ctx_t *hashcat_ctx, const int device_id)
+int status_get_vector_width_dev (const hashcat_ctx_t *hashcat_ctx, const int backend_devices_idx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+  hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
   if (device_param->skipped == true) return 0;
 
diff --git a/src/terminal.c b/src/terminal.c
index 394b724f1..570b4dfea 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -658,61 +658,98 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
-  event_log_info (hashcat_ctx, "OpenCL Info:");
-  event_log_info (hashcat_ctx, NULL);
-
-  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
-  cl_platform_id *platforms             = backend_ctx->platforms;
-  char          **platforms_vendor      = backend_ctx->platforms_vendor;
-  char          **platforms_name        = backend_ctx->platforms_name;
-  char          **platforms_version     = backend_ctx->platforms_version;
-  cl_uint         devices_cnt           = backend_ctx->devices_cnt;
-
-  for (cl_uint platforms_idx = 0; platforms_idx < platforms_cnt; platforms_idx++)
+  if (backend_ctx->cuda)
   {
-    cl_platform_id platform_id       = platforms[platforms_idx];
-    char          *platform_vendor   = platforms_vendor[platforms_idx];
-    char          *platform_name     = platforms_name[platforms_idx];
-    char          *platform_version  = platforms_version[platforms_idx];
-
-    event_log_info (hashcat_ctx, "Platform ID #%u", platforms_idx + 1);
-    event_log_info (hashcat_ctx, "  Vendor  : %s",  platform_vendor);
-    event_log_info (hashcat_ctx, "  Name    : %s",  platform_name);
-    event_log_info (hashcat_ctx, "  Version : %s",  platform_version);
+    event_log_info (hashcat_ctx, "CUDA Info:");
+    event_log_info (hashcat_ctx, "==========");
     event_log_info (hashcat_ctx, NULL);
 
-    for (cl_uint devices_idx = 0; devices_idx < devices_cnt; devices_idx++)
+    int cuda_devices_cnt    = backend_ctx->cuda_devices_cnt;
+    int cuda_driver_version = backend_ctx->cuda_driver_version;
+
+    event_log_info (hashcat_ctx, "  CUDA.Version.: %d.%d", cuda_driver_version / 1000, (cuda_driver_version % 100) / 10);
+    event_log_info (hashcat_ctx, NULL);
+
+    for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++)
     {
-      const hc_device_param_t *device_param = backend_ctx->devices_param + devices_idx;
+      const int backend_devices_idx = backend_ctx->backend_device_from_cuda[cuda_devices_idx];
 
-      if (device_param->platform != platform_id) continue;
+      const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
 
-      cl_device_type opencl_device_type         = device_param->opencl_device_type;
-      cl_uint        device_vendor_id           = device_param->device_vendor_id;
-      char          *device_vendor              = device_param->device_vendor;
-      char          *device_name                = device_param->device_name;
-      u32            device_processors          = device_param->device_processors;
-      u32            device_maxclock_frequency  = device_param->device_maxclock_frequency;
-      u64            device_maxmem_alloc        = device_param->device_maxmem_alloc;
-      u64            device_global_mem          = device_param->device_global_mem;
-      char          *device_opencl_version      = device_param->device_opencl_version;
-      char          *device_version             = device_param->device_version;
-      char          *driver_version             = device_param->driver_version;
+      int   device_id                 = device_param->device_id;
+      char *device_name               = device_param->device_name;
+      u32   device_processors         = device_param->device_processors;
+      u32   device_maxclock_frequency = device_param->device_maxclock_frequency;
+      u64   device_global_mem         = device_param->device_global_mem;
 
-      event_log_info (hashcat_ctx, "  Device ID #%u",         devices_idx + 1);
-      event_log_info (hashcat_ctx, "    Type           : %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
-      event_log_info (hashcat_ctx, "    Vendor ID      : %u", device_vendor_id);
-      event_log_info (hashcat_ctx, "    Vendor         : %s", device_vendor);
-      event_log_info (hashcat_ctx, "    Name           : %s", device_name);
-      event_log_info (hashcat_ctx, "    Version        : %s", device_version);
-      event_log_info (hashcat_ctx, "    Processor(s)   : %u", device_processors);
-      event_log_info (hashcat_ctx, "    Clock          : %u", device_maxclock_frequency);
-      event_log_info (hashcat_ctx, "    Memory         : %" PRIu64 "/%" PRIu64 " MB allocatable", device_maxmem_alloc / 1024 / 1024, device_global_mem / 1024 / 1024);
-      event_log_info (hashcat_ctx, "    OpenCL Version : %s", device_opencl_version);
-      event_log_info (hashcat_ctx, "    Driver Version : %s", driver_version);
+      event_log_info (hashcat_ctx, "  Backend Device ID #%d", device_id + 1);
+      event_log_info (hashcat_ctx, "    Name...........: %s", device_name);
+      event_log_info (hashcat_ctx, "    Processor(s)...: %u", device_processors);
+      event_log_info (hashcat_ctx, "    Clock..........: %u", device_maxclock_frequency);
+      event_log_info (hashcat_ctx, "    Memory.........: %" PRIu64 " MB", device_global_mem / 1024 / 1024);
       event_log_info (hashcat_ctx, NULL);
     }
   }
+
+  if (backend_ctx->ocl)
+  {
+    event_log_info (hashcat_ctx, "OpenCL Info:");
+    event_log_info (hashcat_ctx, "============");
+    event_log_info (hashcat_ctx, NULL);
+
+    cl_uint   opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
+    cl_uint  *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt;
+    char    **opencl_platforms_name        = backend_ctx->opencl_platforms_name;
+    char    **opencl_platforms_vendor      = backend_ctx->opencl_platforms_vendor;
+    char    **opencl_platforms_version     = backend_ctx->opencl_platforms_version;
+
+    for (cl_uint opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
+    {
+      char     *opencl_platform_vendor       = opencl_platforms_vendor[opencl_platforms_idx];
+      char     *opencl_platform_name         = opencl_platforms_name[opencl_platforms_idx];
+      char     *opencl_platform_version      = opencl_platforms_version[opencl_platforms_idx];
+      cl_uint   opencl_platform_devices_cnt  = opencl_platforms_devices_cnt[opencl_platforms_idx];
+
+      event_log_info (hashcat_ctx, "OpenCL Platform ID #%u", opencl_platforms_idx + 1);
+      event_log_info (hashcat_ctx, "  Vendor..: %s",  opencl_platform_vendor);
+      event_log_info (hashcat_ctx, "  Name....: %s",  opencl_platform_name);
+      event_log_info (hashcat_ctx, "  Version.: %s",  opencl_platform_version);
+      event_log_info (hashcat_ctx, NULL);
+
+      for (cl_uint opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++)
+      {
+        const int backend_devices_idx = backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx];
+
+        const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
+
+        int            device_id                  = device_param->device_id;
+        char          *device_name                = device_param->device_name;
+        u32            device_processors          = device_param->device_processors;
+        u32            device_maxclock_frequency  = device_param->device_maxclock_frequency;
+        u64            device_maxmem_alloc        = device_param->device_maxmem_alloc;
+        u64            device_global_mem          = device_param->device_global_mem;
+        cl_device_type opencl_device_type         = device_param->opencl_device_type;
+        cl_uint        opencl_device_vendor_id    = device_param->opencl_device_vendor_id;
+        char          *opencl_device_vendor       = device_param->opencl_device_vendor;
+        char          *opencl_device_c_version    = device_param->opencl_device_c_version;
+        char          *opencl_device_version      = device_param->opencl_device_version;
+        char          *opencl_driver_version      = device_param->opencl_driver_version;
+
+        event_log_info (hashcat_ctx, "  Backend Device ID #%d", device_id + 1);
+        event_log_info (hashcat_ctx, "    Type...........: %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
+        event_log_info (hashcat_ctx, "    Vendor.ID......: %u", opencl_device_vendor_id);
+        event_log_info (hashcat_ctx, "    Vendor.........: %s", opencl_device_vendor);
+        event_log_info (hashcat_ctx, "    Name...........: %s", device_name);
+        event_log_info (hashcat_ctx, "    Version........: %s", opencl_device_version);
+        event_log_info (hashcat_ctx, "    Processor(s)...: %u", device_processors);
+        event_log_info (hashcat_ctx, "    Clock..........: %u", device_maxclock_frequency);
+        event_log_info (hashcat_ctx, "    Memory.........: %" PRIu64 "/%" PRIu64 " MB allocatable", device_maxmem_alloc / 1024 / 1024, device_global_mem / 1024 / 1024);
+        event_log_info (hashcat_ctx, "    OpenCL.Version.: %s", opencl_device_c_version);
+        event_log_info (hashcat_ctx, "    Driver.Version.: %s", opencl_driver_version);
+        event_log_info (hashcat_ctx, NULL);
+      }
+    }
+  }
 }
 
 void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
@@ -724,21 +761,21 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
   if (user_options->machine_readable == true) return;
   if (user_options->status_json      == true) return;
 
-  cl_uint         platforms_cnt         = backend_ctx->platforms_cnt;
-  cl_platform_id *platforms             = backend_ctx->platforms;
-  char          **platforms_vendor      = backend_ctx->platforms_vendor;
-  bool           *platforms_skipped     = backend_ctx->platforms_skipped;
-  cl_uint         devices_cnt           = backend_ctx->devices_cnt;
+  cl_uint         opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
+  cl_platform_id *opencl_platforms             = backend_ctx->opencl_platforms;
+  char          **opencl_platforms_vendor      = backend_ctx->opencl_platforms_vendor;
+  bool           *opencl_platforms_skipped     = backend_ctx->opencl_platforms_skipped;
+  cl_uint         opencl_devices_cnt           = backend_ctx->opencl_devices_cnt;
 
-  for (cl_uint platforms_idx = 0; platforms_idx < platforms_cnt; platforms_idx++)
+  for (cl_uint opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
   {
-    cl_platform_id platform_id       = platforms[platforms_idx];
-    char          *platform_vendor   = platforms_vendor[platforms_idx];
-    bool           platform_skipped  = platforms_skipped[platforms_idx];
+    cl_platform_id opencl_platform_id       = opencl_platforms[opencl_platforms_idx];
+    char          *opencl_platform_vendor   = opencl_platforms_vendor[opencl_platforms_idx];
+    bool           opencl_platform_skipped  = opencl_platforms_skipped[opencl_platforms_idx];
 
-    if (platform_skipped == false)
+    if (opencl_platform_skipped == false)
     {
-      const size_t len = event_log_info (hashcat_ctx, "OpenCL Platform #%u: %s", platforms_idx + 1, platform_vendor);
+      const size_t len = event_log_info (hashcat_ctx, "OpenCL Platform #%u: %s", opencl_platforms_idx + 1, opencl_platform_vendor);
 
       char line[HCBUFSIZ_TINY];
 
@@ -750,14 +787,14 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
     }
     else
     {
-      event_log_info (hashcat_ctx, "OpenCL Platform #%u: %s, skipped or no OpenCL compatible devices found.", platforms_idx + 1, platform_vendor);
+      event_log_info (hashcat_ctx, "OpenCL Platform #%u: %s, skipped or no OpenCL compatible devices found.", opencl_platforms_idx + 1, opencl_platform_vendor);
     }
 
-    for (cl_uint devices_idx = 0; devices_idx < devices_cnt; devices_idx++)
+    for (cl_uint opencl_devices_idx = 0; opencl_devices_idx < opencl_devices_cnt; opencl_devices_idx++)
     {
-      const hc_device_param_t *device_param = backend_ctx->devices_param + devices_idx;
+      const hc_device_param_t *device_param = backend_ctx->devices_param + opencl_devices_idx;
 
-      if (device_param->platform != platform_id) continue;
+      if (device_param->opencl_platform != opencl_platform_id) continue;
 
       char *device_name         = device_param->device_name;
       u32   device_processors   = device_param->device_processors;
@@ -767,7 +804,7 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
       if ((device_param->skipped == false) && (device_param->skipped_warning == false))
       {
         event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 "/%" PRIu64 " MB allocatable, %uMCU",
-                  devices_idx + 1,
+                  opencl_devices_idx + 1,
                   device_name,
                   device_maxmem_alloc / 1024 / 1024,
                   device_global_mem   / 1024 / 1024,
@@ -776,7 +813,7 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
       else
       {
         event_log_info (hashcat_ctx, "* Device #%u: %s, skipped.",
-                  devices_idx + 1,
+                  opencl_devices_idx + 1,
                   device_name);
       }
     }
@@ -851,7 +888,7 @@ void status_display_machine_readable (hashcat_ctx_t *hashcat_ctx)
 
       if (device_info->skipped_warning_dev == true) continue;
 
-      const int temp = hm_get_temperature_with_device_id (hashcat_ctx, device_id);
+      const int temp = hm_get_temperature_with_devices_idx (hashcat_ctx, device_id);
 
       printf ("%d\t", temp);
     }
@@ -871,7 +908,7 @@ void status_display_machine_readable (hashcat_ctx_t *hashcat_ctx)
 
     // ok, little cheat here again...
 
-    const int util = hm_get_utilization_with_device_id (hashcat_ctx, device_id);
+    const int util = hm_get_utilization_with_devices_idx (hashcat_ctx, device_id);
 
     printf ("%d\t", util);
   }
@@ -948,12 +985,12 @@ void status_display_status_json (hashcat_ctx_t *hashcat_ctx)
 
     if (hwmon_ctx->enabled == true)
     {
-      const int temp = hm_get_temperature_with_device_id (hashcat_ctx, device_id);
+      const int temp = hm_get_temperature_with_devices_idx (hashcat_ctx, device_id);
 
       printf (" \"temp\": %d,", temp);
     }
 
-    const int util = hm_get_utilization_with_device_id (hashcat_ctx, device_id);
+    const int util = hm_get_utilization_with_devices_idx (hashcat_ctx, device_id);
 
     printf (" \"util\": %d }", util);
 

From 6fd936b43a1aa18207727dfda49b1c878087defb Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 30 Apr 2019 16:24:13 +0200
Subject: [PATCH 16/73] Removed --opencl-platforms filter in order to force
 backend device numbers to stay constant

---
 docs/changes.txt                |   1 +
 extra/tab_completion/hashcat.sh |  32 +--
 include/types.h                 |   4 -
 src/Makefile                    |   2 +-
 src/backend.c                   | 411 ++++++++++++--------------------
 src/terminal.c                  | 141 +++++++----
 src/usage.c                     |   1 -
 src/user_options.c              |  25 --
 8 files changed, 250 insertions(+), 367 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index d18141fc4..8879ee2c0 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -74,6 +74,7 @@
 - Mode 16800/16801 hash format: Changed separator character from '*' to ':'
 - Requirements: Update runtime check for minimum NVIDIA driver version from 367.x to 418.56 or later
 - Requirements: Add new requirement for NVIDIA GPU: CUDA Toolkit (10.1 or later)
+- OpenCL Options: Removed --opencl-platforms filter in order to force backend device numbers to stay constant
 
 * changes v5.0.0 -> v5.1.0
 
diff --git a/extra/tab_completion/hashcat.sh b/extra/tab_completion/hashcat.sh
index 2932ddad1..8243e7f77 100644
--- a/extra/tab_completion/hashcat.sh
+++ b/extra/tab_completion/hashcat.sh
@@ -189,8 +189,8 @@ _hashcat ()
   local BUILD_IN_CHARSETS='?l ?u ?d ?a ?b ?s ?h ?H'
 
   local SHORT_OPTS="-m -a -V -v -h -b -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -i -I -s -l -O -S -z"
-  local LONG_OPTS="--hash-type --attack-mode --version --help --quiet --benchmark --benchmark-all --hex-salt --hex-wordlist --hex-charset --force --status --status-json --status-timer --machine-readable --loopback --markov-hcstat2 --markov-disable --markov-classic --markov-threshold --runtime --session --speed-only --progress-only --restore --restore-file-path --restore-disable --outfile --outfile-format --outfile-autohex-disable --outfile-check-timer --outfile-check-dir --wordlist-autohex-disable --separator --show --left --username --remove --remove-timer --potfile-disable --potfile-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --example-hashes --opencl-info --opencl-devices --opencl-platforms --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-disable --hwmon-temp-abort --skip --limit --keyspace --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment --increment-min --increment-max --logfile-disable --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --stdout --keep-guessing --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --self-test-disable  --slow-candidates --brain-server --brain-client --brain-client-features --brain-host --brain-port --brain-session --brain-session-whitelist --brain-password"
-  local OPTIONS="-m -a -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -s -l --hash-type --attack-mode --status-timer --markov-hcstat2 --markov-threshold --runtime --session --timer --outfile --outfile-format --outfile-check-timer --outfile-check-dir --separator --remove-timer --potfile-path --restore-file-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --opencl-devices --opencl-platforms --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-temp-abort --skip --limit --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment-min --increment-max --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --brain-client-features --brain-host --brain-password --brain-port --brain-session --brain-whitelist-session --stdin-timeout-abort"
+  local LONG_OPTS="--hash-type --attack-mode --version --help --quiet --benchmark --benchmark-all --hex-salt --hex-wordlist --hex-charset --force --status --status-json --status-timer --machine-readable --loopback --markov-hcstat2 --markov-disable --markov-classic --markov-threshold --runtime --session --speed-only --progress-only --restore --restore-file-path --restore-disable --outfile --outfile-format --outfile-autohex-disable --outfile-check-timer --outfile-check-dir --wordlist-autohex-disable --separator --show --left --username --remove --remove-timer --potfile-disable --potfile-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --example-hashes --opencl-info --opencl-devices --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-disable --hwmon-temp-abort --skip --limit --keyspace --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment --increment-min --increment-max --logfile-disable --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --stdout --keep-guessing --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --self-test-disable  --slow-candidates --brain-server --brain-client --brain-client-features --brain-host --brain-port --brain-session --brain-session-whitelist --brain-password"
+  local OPTIONS="-m -a -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -s -l --hash-type --attack-mode --status-timer --markov-hcstat2 --markov-threshold --runtime --session --timer --outfile --outfile-format --outfile-check-timer --outfile-check-dir --separator --remove-timer --potfile-path --restore-file-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --opencl-devices --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-temp-abort --skip --limit --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment-min --increment-max --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --brain-client-features --brain-host --brain-password --brain-port --brain-session --brain-whitelist-session --stdin-timeout-abort"
 
   COMPREPLY=()
   local cur="${COMP_WORDS[COMP_CWORD]}"
@@ -270,34 +270,6 @@ _hashcat ()
       return 0
       ;;
 
-    --opencl-platforms)
-      local icd_list=$(ls -1 /etc/OpenCL/vendors/*.icd 2> /dev/null)
-
-      local architecture=$(getconf LONG_BIT 2> /dev/null)
-
-      if [ -z "${architecture}" ]; then
-        return 0
-      fi
-
-      # filter the icd_list (do not show 32 bit on 64bit systems and vice versa)
-
-      if [ "${architecture}" -eq 64 ]; then
-
-        icd_list=$(echo "${icd_list}" | grep -v "32.icd")
-
-      else
-
-        icd_list=$(echo "${icd_list}" | grep -v "64.icd")
-
-      fi
-
-      local number_icds=$(seq 1 $(echo "${icd_list}" | wc -l))
-
-      COMPREPLY=($(compgen -W "${number_icds}" -- ${cur}))
-
-      return 0
-      ;;
-
     --cpu-affinity)
       _hashcat_cpu_devices
       local num_devices=${?}
diff --git a/include/types.h b/include/types.h
index ba8b0ea78..3317f37cd 100644
--- a/include/types.h
+++ b/include/types.h
@@ -693,7 +693,6 @@ typedef enum user_options_map
   IDX_OPENCL_DEVICES            = 'd',
   IDX_OPENCL_DEVICE_TYPES       = 'D',
   IDX_OPENCL_INFO               = 'I',
-  IDX_OPENCL_PLATFORMS          = 0xff26,
   IDX_OPENCL_VECTOR_WIDTH       = 0xff27,
   IDX_OPTIMIZED_KERNEL_ENABLE   = 'O',
   IDX_OUTFILE_AUTOHEX_DISABLE   = 0xff28,
@@ -1389,11 +1388,9 @@ typedef struct backend_ctx
   cl_device_id      **opencl_platforms_devices;
   cl_uint            *opencl_platforms_devices_cnt;
   char              **opencl_platforms_name;
-  bool               *opencl_platforms_skipped;
   char              **opencl_platforms_vendor;
   char              **opencl_platforms_version;
 
-  u64                 opencl_platforms_filter;
   cl_device_type      opencl_device_types_filter;
 
 } backend_ctx_t;
@@ -1783,7 +1780,6 @@ typedef struct user_options
   char        *markov_hcstat2;
   char        *opencl_devices;
   char        *opencl_device_types;
-  char        *opencl_platforms;
   char        *outfile;
   char        *outfile_check_dir;
   char        *potfile_path;
diff --git a/src/Makefile b/src/Makefile
index 9ffea7d67..3f90fafe0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,7 +4,7 @@
 ##
 
 SHARED                  := 0
-DEBUG                   := 1
+DEBUG                   := 0
 PRODUCTION              := 0
 PRODUCTION_VERSION      := v5.1.0
 ENABLE_BRAIN            := 1
diff --git a/src/backend.c b/src/backend.c
index 1dddf5c53..9a24b81fe 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -37,6 +37,74 @@ static const u32 full80 = 0x80808080;
 
 static double TARGET_MSEC_PROFILE[4] = { 2, 12, 96, 480 };
 
+static bool is_same_device (const hc_device_param_t *src, const hc_device_param_t *dst)
+{
+  if (src->pcie_bus      != dst->pcie_bus)      return false;
+  if (src->pcie_device   != dst->pcie_device)   return false;
+  if (src->pcie_function != dst->pcie_function) return false;
+
+  return true;
+}
+
+static int backend_ctx_find_duplicate_devices (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  for (int backend_devices_cnt_src = 0; backend_devices_cnt_src < backend_ctx->backend_devices_cnt; backend_devices_cnt_src++)
+  {
+    hc_device_param_t *device_param_src = &backend_ctx->devices_param[backend_devices_cnt_src];
+
+    if (device_param_src->skipped == true) continue;
+
+    if (device_param_src->skipped_warning == true) continue;
+
+    for (int backend_devices_cnt_dst = backend_devices_cnt_src + 1; backend_devices_cnt_dst < backend_ctx->backend_devices_cnt; backend_devices_cnt_dst++)
+    {
+      hc_device_param_t *device_param_dst = &backend_ctx->devices_param[backend_devices_cnt_dst];
+
+      if (device_param_dst->skipped == true) continue;
+
+      if (device_param_dst->skipped_warning == true) continue;
+
+      if (is_same_device (device_param_src, device_param_dst) == false) continue;
+
+      device_param_dst->skipped = true;
+    }
+  }
+
+  return -1;
+}
+
+static bool is_same_device_type (const hc_device_param_t *src, const hc_device_param_t *dst)
+{
+  if (strcmp (src->device_name, dst->device_name) != 0) return false;
+
+  if (src->is_cuda   != dst->is_cuda)   return false;
+  if (src->is_opencl != dst->is_opencl) return false;
+
+  if (src->is_cuda == true)
+  {
+    if (strcmp (src->opencl_device_vendor,  dst->opencl_device_vendor)  != 0) return false;
+    if (strcmp (src->opencl_device_version, dst->opencl_device_version) != 0) return false;
+    if (strcmp (src->opencl_driver_version, dst->opencl_driver_version) != 0) return false;
+  }
+
+  if (src->device_processors         != dst->device_processors)         return false;
+  if (src->device_maxclock_frequency != dst->device_maxclock_frequency) return false;
+  if (src->device_maxworkgroup_size  != dst->device_maxworkgroup_size)  return false;
+
+  // memory size can be different, depending on which gpu has a monitor connected
+  // if (src->device_maxmem_alloc       != dst->device_maxmem_alloc)       return false;
+  // if (src->device_global_mem         != dst->device_global_mem)         return false;
+
+  if (src->sm_major != dst->sm_major) return false;
+  if (src->sm_minor != dst->sm_minor) return false;
+
+  if (src->kernel_exec_timeout != dst->kernel_exec_timeout) return false;
+
+  return true;
+}
+
 static int ocl_check_dri (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx)
 {
   #if defined (__linux__)
@@ -148,49 +216,6 @@ static bool setup_devices_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl
   return true;
 }
 
-static bool setup_opencl_platforms_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_platforms, u64 *out)
-{
-  u64 opencl_platforms_filter = 0;
-
-  if (opencl_platforms)
-  {
-    char *platforms = hcstrdup (opencl_platforms);
-
-    if (platforms == NULL) return false;
-
-    char *saveptr = NULL;
-
-    char *next = strtok_r (platforms, ",", &saveptr);
-
-    do
-    {
-      const int platform = (const int) strtol (next, NULL, 10);
-
-      if (platform <= 0 || platform >= 64)
-      {
-        event_log_error (hashcat_ctx, "Invalid OpenCL platform %d specified.", platform);
-
-        hcfree (platforms);
-
-        return false;
-      }
-
-      opencl_platforms_filter |= 1ULL << (platform - 1);
-
-    } while ((next = strtok_r ((char *) NULL, ",", &saveptr)) != NULL);
-
-    hcfree (platforms);
-  }
-  else
-  {
-    opencl_platforms_filter = -1ULL;
-  }
-
-  *out = opencl_platforms_filter;
-
-  return true;
-}
-
 static bool setup_opencl_device_types_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_device_types, cl_device_type *out)
 {
   cl_device_type opencl_device_types_filter = 0;
@@ -567,18 +592,7 @@ int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
   if (nvrtc->lib == NULL) nvrtc->lib = hc_dlopen ("libnvrtc.so.1");
   #endif
 
-  if (nvrtc->lib == NULL)
-  {
-    event_log_error (hashcat_ctx, "Cannot find NVRTC library.");
-
-    event_log_warning (hashcat_ctx, "You are probably missing the native CUDA SDK and/or driver for your platform.");
-    event_log_warning (hashcat_ctx, "NVIDIA GPUs require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
-    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
-    event_log_warning (hashcat_ctx, NULL);
-
-    return -1;
-  }
+  if (nvrtc->lib == NULL) return -1;
 
   HC_LOAD_FUNC (nvrtc, nvrtcAddNameExpression,  NVRTC_NVRTCADDNAMEEXPRESSION, NVRTC, 1);
   HC_LOAD_FUNC (nvrtc, nvrtcCompileProgram,     NVRTC_NVRTCCOMPILEPROGRAM,    NVRTC, 1);
@@ -763,18 +777,7 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
   if (cuda->lib == NULL) cuda->lib = hc_dlopen ("libcuda.so.1");
   #endif
 
-  if (cuda->lib == NULL)
-  {
-    event_log_error (hashcat_ctx, "Cannot find CUDA library.");
-
-    event_log_warning (hashcat_ctx, "You are probably missing the native CUDA runtime or driver for your platform.");
-    event_log_warning (hashcat_ctx, "NVIDIA GPUs require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
-    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
-    event_log_warning (hashcat_ctx, NULL);
-
-    return -1;
-  }
+  if (cuda->lib == NULL) return -1;
 
   HC_LOAD_FUNC (cuda, cuCtxCreate,              CUDA_CUCTXCREATE,               CUDA, 1);
   HC_LOAD_FUNC (cuda, cuCtxDestroy,             CUDA_CUCTXDESTROY,              CUDA, 1);
@@ -1064,39 +1067,7 @@ int ocl_init (hashcat_ctx_t *hashcat_ctx)
   if (ocl->lib == NULL) ocl->lib = hc_dlopen ("libOpenCL.so.1");
   #endif
 
-  if (ocl->lib == NULL)
-  {
-    event_log_error (hashcat_ctx, "Cannot find an OpenCL ICD loader library.");
-
-    event_log_warning (hashcat_ctx, "You are probably missing the native OpenCL runtime or driver for your platform.");
-    event_log_warning (hashcat_ctx, NULL);
-
-    #if defined (__linux__)
-    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (1.6.180 or later)");
-    #elif defined (_WIN)
-    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"AMD Radeon Software Crimson Edition\" (15.12 or later)");
-    #endif
-
-    event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"OpenCL Runtime for Intel Core and Intel Xeon Processors\" (16.1.1 or later)");
-
-    #if defined (__linux__)
-    event_log_warning (hashcat_ctx, "* Intel GPUs on Linux require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"OpenCL 2.0 GPU Driver Package for Linux\" (2.0 or later)");
-    #elif defined (_WIN)
-    event_log_warning (hashcat_ctx, "* Intel GPUs on Windows require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"OpenCL Driver for Intel Iris and Intel HD Graphics\"");
-    #endif
-
-    event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver:");
-    event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
-    event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
-    event_log_warning (hashcat_ctx, NULL);
-
-    return -1;
-  }
+  if (ocl->lib == NULL) return -1;
 
   HC_LOAD_FUNC (ocl, clBuildProgram,            OCL_CLBUILDPROGRAM,             OpenCL, 1);
   HC_LOAD_FUNC (ocl, clCreateBuffer,            OCL_CLCREATEBUFFER,             OpenCL, 1);
@@ -3457,7 +3428,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   backend_ctx->cuda = cuda;
 
-  const int rc_cuda_init = cuda_init (hashcat_ctx);
+  int rc_cuda_init = cuda_init (hashcat_ctx);
 
   if (rc_cuda_init == -1)
   {
@@ -3472,7 +3443,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   backend_ctx->nvrtc = nvrtc;
 
-  const int rc_nvrtc_init = nvrtc_init (hashcat_ctx);
+  int rc_nvrtc_init = nvrtc_init (hashcat_ctx);
 
   if (rc_nvrtc_init == -1)
   {
@@ -3497,6 +3468,9 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
   }
   else
   {
+    rc_cuda_init  = -1;
+    rc_nvrtc_init = -1;
+
     cuda_close  (hashcat_ctx);
     nvrtc_close (hashcat_ctx);
   }
@@ -3522,31 +3496,31 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if ((rc_cuda_init == -1) && (rc_ocl_init == -1))
   {
-    event_log_error (hashcat_ctx, "ATTENTION! No CUDA or OpenCL installation found.");
+    event_log_error (hashcat_ctx, "ATTENTION! No OpenCL or CUDA installation found.");
 
     event_log_warning (hashcat_ctx, "You are probably missing the CUDA or OpenCL runtime installation.");
     event_log_warning (hashcat_ctx, NULL);
 
     #if defined (__linux__)
-    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this driver:");
     event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (1.6.180 or later)");
     #elif defined (_WIN)
-    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this driver:");
     event_log_warning (hashcat_ctx, "  \"AMD Radeon Software Crimson Edition\" (15.12 or later)");
     #endif
 
-    event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime:");
     event_log_warning (hashcat_ctx, "  \"OpenCL Runtime for Intel Core and Intel Xeon Processors\" (16.1.1 or later)");
 
     #if defined (__linux__)
-    event_log_warning (hashcat_ctx, "* Intel GPUs on Linux require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* Intel GPUs on Linux require this driver:");
     event_log_warning (hashcat_ctx, "  \"OpenCL 2.0 GPU Driver Package for Linux\" (2.0 or later)");
     #elif defined (_WIN)
-    event_log_warning (hashcat_ctx, "* Intel GPUs on Windows require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* Intel GPUs on Windows require this driver:");
     event_log_warning (hashcat_ctx, "  \"OpenCL Driver for Intel Iris and Intel HD Graphics\"");
     #endif
 
-    event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver (both):");
     event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
     event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
     event_log_warning (hashcat_ctx, NULL);
@@ -3563,7 +3537,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
   if (rc_ocl_check == -1) return -1;
 
   /**
-   * OpenCL device selection
+   * OpenCL device selection (tbd rename)
    */
 
   u64 devices_filter;
@@ -3574,6 +3548,18 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   backend_ctx->devices_filter = devices_filter;
 
+  /**
+   * OpenCL device type selection (tbd rename)
+   */
+
+  cl_device_type opencl_device_types_filter;
+
+  const bool rc_opencl_device_types_filter = setup_opencl_device_types_filter (hashcat_ctx, user_options->opencl_device_types, &opencl_device_types_filter);
+
+  if (rc_opencl_device_types_filter == false) return -1;
+
+  backend_ctx->opencl_device_types_filter = opencl_device_types_filter;
+
   /**
    * CUDA API: init
    */
@@ -3600,7 +3586,6 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
       hcfree (opencl_platforms_devices);      \
       hcfree (opencl_platforms_devices_cnt);  \
       hcfree (opencl_platforms_name);         \
-      hcfree (opencl_platforms_skipped);      \
       hcfree (opencl_platforms_vendor);       \
       hcfree (opencl_platforms_version);      \
     }
@@ -3610,7 +3595,6 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     cl_device_id  **opencl_platforms_devices     = (cl_device_id **)  hccalloc (CL_PLATFORMS_MAX, sizeof (cl_device_id *));
     cl_uint        *opencl_platforms_devices_cnt = (cl_uint *)        hccalloc (CL_PLATFORMS_MAX, sizeof (cl_uint));
     char          **opencl_platforms_name        = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
-    bool           *opencl_platforms_skipped     = (bool *)           hccalloc (CL_PLATFORMS_MAX, sizeof (bool));
     char          **opencl_platforms_vendor      = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
     char          **opencl_platforms_version     = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
 
@@ -3627,45 +3611,6 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
     if (opencl_platforms_cnt)
     {
-      /**
-       * OpenCL platform selection
-       */
-
-      u64 opencl_platforms_filter;
-
-      const bool rc_platforms_filter = setup_opencl_platforms_filter (hashcat_ctx, user_options->opencl_platforms, &opencl_platforms_filter);
-
-      if (rc_platforms_filter == false) return -1;
-
-      backend_ctx->opencl_platforms_filter = opencl_platforms_filter;
-
-      if (opencl_platforms_filter != (u64) -1)
-      {
-        u64 opencl_platform_cnt_mask = ~(((u64) -1 >> opencl_platforms_cnt) << opencl_platforms_cnt);
-
-        if (opencl_platforms_filter > opencl_platform_cnt_mask)
-        {
-          event_log_error (hashcat_ctx, "An invalid platform was specified using the --opencl-platforms parameter.");
-          event_log_error (hashcat_ctx, "The specified platform was higher than the number of available platforms (%u).", opencl_platforms_cnt);
-
-          FREE_OPENCL_CTX_ON_ERROR;
-
-          return -1;
-        }
-      }
-
-      /**
-       * OpenCL device type selection
-       */
-
-      cl_device_type opencl_device_types_filter;
-
-      const bool rc_opencl_device_types_filter = setup_opencl_device_types_filter (hashcat_ctx, user_options->opencl_device_types, &opencl_device_types_filter);
-
-      if (rc_opencl_device_types_filter == false) return -1;
-
-      backend_ctx->opencl_device_types_filter = opencl_device_types_filter;
-
       if (user_options->opencl_device_types == NULL)
       {
         /**
@@ -3694,13 +3639,6 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
             continue;
           }
 
-          if ((opencl_platforms_filter & (1ULL << opencl_platforms_idx)) == 0)
-          {
-            hcfree (opencl_platform_devices);
-
-            continue;
-          }
-
           for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++)
           {
             cl_device_id opencl_device = opencl_platform_devices[opencl_platform_devices_idx];
@@ -3749,7 +3687,6 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     backend_ctx->opencl_platforms_devices     = opencl_platforms_devices;
     backend_ctx->opencl_platforms_devices_cnt = opencl_platforms_devices_cnt;
     backend_ctx->opencl_platforms_name        = opencl_platforms_name;
-    backend_ctx->opencl_platforms_skipped     = opencl_platforms_skipped;
     backend_ctx->opencl_platforms_vendor      = opencl_platforms_vendor;
     backend_ctx->opencl_platforms_version     = opencl_platforms_version;
 
@@ -3762,31 +3699,31 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if ((backend_ctx->cuda == NULL) && (backend_ctx->ocl == NULL))
   {
-    event_log_error (hashcat_ctx, "ATTENTION! No CUDA-compatible or OpenCL-compatible platform found.");
+    event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible or CUDA-compatible platform found.");
 
-    event_log_warning (hashcat_ctx, "You are probably missing the CUDA or OpenCL runtime installation.");
+    event_log_warning (hashcat_ctx, "You are probably missing the OpenCL or CUDA runtime installation.");
     event_log_warning (hashcat_ctx, NULL);
 
     #if defined (__linux__)
-    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this driver:");
     event_log_warning (hashcat_ctx, "  \"RadeonOpenCompute (ROCm)\" Software Platform (1.6.180 or later)");
     #elif defined (_WIN)
-    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this driver:");
     event_log_warning (hashcat_ctx, "  \"AMD Radeon Software Crimson Edition\" (15.12 or later)");
     #endif
 
-    event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime:");
     event_log_warning (hashcat_ctx, "  \"OpenCL Runtime for Intel Core and Intel Xeon Processors\" (16.1.1 or later)");
 
     #if defined (__linux__)
-    event_log_warning (hashcat_ctx, "* Intel GPUs on Linux require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* Intel GPUs on Linux require this driver:");
     event_log_warning (hashcat_ctx, "  \"OpenCL 2.0 GPU Driver Package for Linux\" (2.0 or later)");
     #elif defined (_WIN)
-    event_log_warning (hashcat_ctx, "* Intel GPUs on Windows require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* Intel GPUs on Windows require this driver:");
     event_log_warning (hashcat_ctx, "  \"OpenCL Driver for Intel Iris and Intel HD Graphics\"");
     #endif
 
-    event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver:");
+    event_log_warning (hashcat_ctx, "* NVIDIA GPUs require this runtime and/or driver (both):");
     event_log_warning (hashcat_ctx, "  \"NVIDIA Driver\" (418.56 or later)");
     event_log_warning (hashcat_ctx, "  \"CUDA Toolkit\" (10.1 or later)");
     event_log_warning (hashcat_ctx, NULL);
@@ -3815,7 +3752,6 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
   hcfree (backend_ctx->opencl_platforms_devices);
   hcfree (backend_ctx->opencl_platforms_devices_cnt);
   hcfree (backend_ctx->opencl_platforms_name);
-  hcfree (backend_ctx->opencl_platforms_skipped);
   hcfree (backend_ctx->opencl_platforms_vendor);
   hcfree (backend_ctx->opencl_platforms_version);
 
@@ -3987,7 +3923,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if (max_shared_memory_per_block < 32768)
       {
-        event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", backend_devices_idx + 1);
+        event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", device_id + 1);
 
         device_param->skipped = true;
       }
@@ -4002,7 +3938,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if (device_max_constant_buffer_size < 65536)
       {
-        event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", backend_devices_idx + 1);
+        event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
 
         device_param->skipped = true;
       }
@@ -4013,10 +3949,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->device_local_mem_type = device_local_mem_type;
 
-      //
+      // skipped
+
+      if ((backend_ctx->devices_filter & (1ULL << device_id)) == 0)
+      {
+        device_param->skipped = true;
+      }
+
+      if ((backend_ctx->opencl_device_types_filter & CL_DEVICE_TYPE_GPU) == 0)
+      {
+        device_param->skipped = true;
+      }
 
 
-      device_param->skipped = true; // while developing
     }
   }
 
@@ -4037,7 +3982,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
     cl_device_id  **opencl_platforms_devices     = backend_ctx->opencl_platforms_devices;
     cl_uint        *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt;
     char          **opencl_platforms_name        = backend_ctx->opencl_platforms_name;
-    bool           *opencl_platforms_skipped     = backend_ctx->opencl_platforms_skipped;
     char          **opencl_platforms_vendor      = backend_ctx->opencl_platforms_vendor;
     char          **opencl_platforms_version     = backend_ctx->opencl_platforms_version;
 
@@ -4138,24 +4082,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         opencl_platform_vendor_id = VENDOR_ID_GENERIC;
       }
 
-      if (user_options->force == false)
-      {
-        if (opencl_platform_vendor_id == VENDOR_ID_MESA)
-        {
-          event_log_error (hashcat_ctx, "Mesa (Gallium) OpenCL platform detected!");
-
-          event_log_warning (hashcat_ctx, "The Mesa platform can cause errors that are often mistaken for bugs in hashcat.");
-          event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the drivers listed in docs/readme.txt.");
-          event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
-          event_log_warning (hashcat_ctx, "You can also use --opencl-platforms to skip the Mesa platform(s).");
-          event_log_warning (hashcat_ctx, NULL);
-
-          return -1;
-        }
-      }
-
-      bool opencl_platform_skipped = ((backend_ctx->opencl_platforms_filter & (1ULL << opencl_platform_idx)) == 0);
-
       cl_device_id *opencl_platform_devices = (cl_device_id *) hccalloc (DEVICES_MAX, sizeof (cl_device_id));
 
       cl_uint opencl_platform_devices_cnt = 0;
@@ -4164,21 +4090,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if (CL_rc == -1)
       {
-        //event_log_error (hashcat_ctx, "clGetDeviceIDs(): %s", val2cstr_cl (CL_rc));
+        event_log_error (hashcat_ctx, "clGetDeviceIDs(): %s", val2cstr_cl (CL_rc));
 
-        //return -1;
-
-        opencl_platform_skipped = true;
+        return -1;
       }
 
       opencl_platforms_devices[opencl_platform_idx] = opencl_platform_devices;
 
       opencl_platforms_devices_cnt[opencl_platform_idx] = opencl_platform_devices_cnt;
 
-      opencl_platforms_skipped[opencl_platform_idx] = opencl_platform_skipped;
-
-      //if (opencl_platform_skipped == true) continue;
-
       for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, backend_devices_idx++, opencl_devices_cnt++)
       {
         const u32 device_id = backend_devices_idx;
@@ -4394,7 +4314,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (device_endian_little == CL_FALSE)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", backend_devices_idx + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device is not little-endian.", device_id + 1);
 
           device_param->skipped = true;
         }
@@ -4409,7 +4329,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (device_available == CL_FALSE)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", backend_devices_idx + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device is not available.", device_id + 1);
 
           device_param->skipped = true;
         }
@@ -4424,7 +4344,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (device_compiler_available == CL_FALSE)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", backend_devices_idx + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: No compiler is available for this device.", device_id + 1);
 
           device_param->skipped = true;
         }
@@ -4439,7 +4359,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", backend_devices_idx + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support executing kernels.", device_id + 1);
 
           device_param->skipped = true;
         }
@@ -4460,14 +4380,14 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (strstr (device_extensions, "base_atomics") == 0)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", backend_devices_idx + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support base atomics.", device_id + 1);
 
           device_param->skipped = true;
         }
 
         if (strstr (device_extensions, "byte_addressable_store") == 0)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", backend_devices_idx + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device does not support byte-addressable store.", device_id + 1);
 
           device_param->skipped = true;
         }
@@ -4484,7 +4404,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (device_max_constant_buffer_size < 65536)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", backend_devices_idx + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device's constant buffer size is too small.", device_id + 1);
 
           device_param->skipped = true;
         }
@@ -4499,7 +4419,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (device_local_mem_size < 32768)
         {
-          event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", backend_devices_idx + 1);
+          event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
 
           device_param->skipped = true;
         }
@@ -4528,7 +4448,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           {
             if (user_options->force == false)
             {
-              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Not a native Intel OpenCL runtime. Expect massive speed loss.", backend_devices_idx + 1);
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Not a native Intel OpenCL runtime. Expect massive speed loss.", device_id + 1);
               if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
               device_param->skipped = true;
@@ -4548,7 +4468,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           {
             if (user_options->force == false)
             {
-              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", backend_devices_idx + 1);
+              if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Intel's OpenCL runtime (GPU only) is currently broken.", device_id + 1);
               if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             We are waiting for updated OpenCL drivers from Intel.");
               if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
@@ -4711,7 +4631,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (intel_warn == true)
                 {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", backend_devices_idx + 1, device_param->opencl_driver_version);
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken Intel OpenCL runtime '%s' detected!", device_id + 1, device_param->opencl_driver_version);
 
                   event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
                   event_log_warning (hashcat_ctx, "See hashcat.net for officially supported NVIDIA drivers.");
@@ -4749,7 +4669,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (amd_warn == true)
                 {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", backend_devices_idx + 1, device_param->opencl_driver_version);
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken AMD driver '%s' detected!", device_id + 1, device_param->opencl_driver_version);
 
                   event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported AMD driver.");
                   event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD drivers.");
@@ -4800,7 +4720,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (nv_warn == true)
                 {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", backend_devices_idx + 1, device_param->opencl_driver_version);
+                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->opencl_driver_version);
 
                   event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
                   event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers.");
@@ -4813,14 +4733,14 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (device_param->sm_major < 5)
                 {
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", backend_devices_idx + 1, device_param->sm_major, device_param->sm_minor);
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
                   if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
                   if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
                 }
 
                 if (device_param->kernel_exec_timeout != 0)
                 {
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", backend_devices_idx + 1);
+                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
                   if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
                   if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
                 }
@@ -4828,7 +4748,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
               if ((strstr (device_param->opencl_device_c_version, "beignet")) || (strstr (device_param->opencl_device_version, "beignet")))
               {
-                event_log_error (hashcat_ctx, "* Device #%u: Intel beignet driver detected!", backend_devices_idx + 1);
+                event_log_error (hashcat_ctx, "* Device #%u: Intel beignet driver detected!", device_id + 1);
 
                 event_log_warning (hashcat_ctx, "The beignet driver has been marked as likely to fail kernel compilation.");
                 event_log_warning (hashcat_ctx, "You can use --force to override this, but do not report related errors.");
@@ -5018,6 +4938,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
     return -1;
   }
 
+  // find duplicate devices (typically cuda and opencl!)
+
+  if (user_options->force == false)
+  {
+    backend_ctx_find_duplicate_devices (hashcat_ctx);
+  }
+
   // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
 
   if (backend_ctx->devices_filter != (u64) -1)
@@ -5086,36 +5013,6 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
   backend_ctx->need_sysfs  = false;
 }
 
-static bool is_same_device_type (const hc_device_param_t *src, const hc_device_param_t *dst)
-{
-  if (strcmp (src->device_name, dst->device_name) != 0) return false;
-
-  if (src->is_cuda   != dst->is_cuda)   return false;
-  if (src->is_opencl != dst->is_opencl) return false;
-
-  if (src->is_cuda == true)
-  {
-    if (strcmp (src->opencl_device_vendor,  dst->opencl_device_vendor)  != 0) return false;
-    if (strcmp (src->opencl_device_version, dst->opencl_device_version) != 0) return false;
-    if (strcmp (src->opencl_driver_version, dst->opencl_driver_version) != 0) return false;
-  }
-
-  if (src->device_processors         != dst->device_processors)         return false;
-  if (src->device_maxclock_frequency != dst->device_maxclock_frequency) return false;
-  if (src->device_maxworkgroup_size  != dst->device_maxworkgroup_size)  return false;
-
-  // memory size can be different, depending on which gpu has a monitor connected
-  // if (src->device_maxmem_alloc       != dst->device_maxmem_alloc)       return false;
-  // if (src->device_global_mem         != dst->device_global_mem)         return false;
-
-  if (src->sm_major != dst->sm_major) return false;
-  if (src->sm_minor != dst->sm_minor) return false;
-
-  if (src->kernel_exec_timeout != dst->kernel_exec_timeout) return false;
-
-  return true;
-}
-
 void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -5466,6 +5363,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     EVENT_DATA (EVENT_OPENCL_DEVICE_INIT_PRE, &backend_devices_idx, sizeof (int));
 
+    const int device_id = device_param->device_id;
+
     /**
      * module depending checks
      */
@@ -5478,7 +5377,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if ((unstable_warning == true) && (user_options->force == false))
       {
-        event_log_warning (hashcat_ctx, "* Device #%u: Skipping hash-mode %u - known OpenCL/Driver issue (not a hashcat issue)", backend_devices_idx + 1, hashconfig->hash_mode);
+        event_log_warning (hashcat_ctx, "* Device #%u: Skipping hash-mode %u - known OpenCL/Driver issue (not a hashcat issue)", device_id + 1, hashconfig->hash_mode);
         event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
         device_param->skipped_warning = true;
@@ -5853,8 +5752,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     build_options_module_buf[build_options_module_len] = 0;
 
     #if defined (DEBUG)
-    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options '%s'", backend_devices_idx + 1, build_options_buf);
-    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options_module '%s'", backend_devices_idx + 1, build_options_module_buf);
+    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options '%s'", device_id + 1, build_options_buf);
+    if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_options_module '%s'", device_id + 1, build_options_module_buf);
     #endif
 
     /**
@@ -5976,7 +5875,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (cached == false)
       {
         #if defined (DEBUG)
-        if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", backend_devices_idx + 1, filename_from_filepath (cached_file));
+        if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", device_id + 1, filename_from_filepath (cached_file));
         #endif
 
         const bool rc_read_kernel = read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources, true);
@@ -6029,7 +5928,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             device_param->skipped_warning = true;
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", backend_devices_idx + 1, source_file);
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
 
             continue;
           }
@@ -6100,7 +5999,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             device_param->skipped_warning = true;
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", backend_devices_idx + 1, source_file);
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
 
             continue;
           }
@@ -6213,7 +6112,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         if (cached == false)
         {
           #if defined (DEBUG)
-          if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", backend_devices_idx + 1, filename_from_filepath (cached_file));
+          if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", device_id + 1, filename_from_filepath (cached_file));
           #endif
 
           const bool rc_read_kernel = read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources, true);
@@ -6255,7 +6154,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             device_param->skipped_warning = true;
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", backend_devices_idx + 1, source_file);
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
 
             continue;
           }
@@ -6368,7 +6267,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         if (cached == false)
         {
           #if defined (DEBUG)
-          if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", backend_devices_idx + 1, filename_from_filepath (cached_file));
+          if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache! Building may take a while...", device_id + 1, filename_from_filepath (cached_file));
           #endif
 
           const bool rc_read_kernel = read_kernel_binary (hashcat_ctx, source_file, kernel_lengths, kernel_sources, true);
@@ -6410,7 +6309,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             device_param->skipped_warning = true;
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", backend_devices_idx + 1, source_file);
+            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
 
             continue;
           }
@@ -7666,7 +7565,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (kernel_accel_max < kernel_accel_min)
     {
-      event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory for this attack.", backend_devices_idx + 1);
+      event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory for this attack.", device_id + 1);
 
       return -1;
     }
diff --git a/src/terminal.c b/src/terminal.c
index 570b4dfea..1e65b2e53 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -667,7 +667,7 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
     int cuda_devices_cnt    = backend_ctx->cuda_devices_cnt;
     int cuda_driver_version = backend_ctx->cuda_driver_version;
 
-    event_log_info (hashcat_ctx, "  CUDA.Version.: %d.%d", cuda_driver_version / 1000, (cuda_driver_version % 100) / 10);
+    event_log_info (hashcat_ctx, "CUDA.Version.: %d.%d", cuda_driver_version / 1000, (cuda_driver_version % 100) / 10);
     event_log_info (hashcat_ctx, NULL);
 
     for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++)
@@ -682,11 +682,11 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
       u32   device_maxclock_frequency = device_param->device_maxclock_frequency;
       u64   device_global_mem         = device_param->device_global_mem;
 
-      event_log_info (hashcat_ctx, "  Backend Device ID #%d", device_id + 1);
-      event_log_info (hashcat_ctx, "    Name...........: %s", device_name);
-      event_log_info (hashcat_ctx, "    Processor(s)...: %u", device_processors);
-      event_log_info (hashcat_ctx, "    Clock..........: %u", device_maxclock_frequency);
-      event_log_info (hashcat_ctx, "    Memory.........: %" PRIu64 " MB", device_global_mem / 1024 / 1024);
+      event_log_info (hashcat_ctx, "Backend Device ID #%d", device_id + 1);
+      event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
+      event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
+      event_log_info (hashcat_ctx, "  Clock..........: %u", device_maxclock_frequency);
+      event_log_info (hashcat_ctx, "  Memory.........: %" PRIu64 " MB", device_global_mem / 1024 / 1024);
       event_log_info (hashcat_ctx, NULL);
     }
   }
@@ -761,21 +761,65 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
   if (user_options->machine_readable == true) return;
   if (user_options->status_json      == true) return;
 
-  cl_uint         opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
-  cl_platform_id *opencl_platforms             = backend_ctx->opencl_platforms;
-  char          **opencl_platforms_vendor      = backend_ctx->opencl_platforms_vendor;
-  bool           *opencl_platforms_skipped     = backend_ctx->opencl_platforms_skipped;
-  cl_uint         opencl_devices_cnt           = backend_ctx->opencl_devices_cnt;
-
-  for (cl_uint opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
+  if (backend_ctx->cuda)
   {
-    cl_platform_id opencl_platform_id       = opencl_platforms[opencl_platforms_idx];
-    char          *opencl_platform_vendor   = opencl_platforms_vendor[opencl_platforms_idx];
-    bool           opencl_platform_skipped  = opencl_platforms_skipped[opencl_platforms_idx];
+    int cuda_devices_cnt    = backend_ctx->cuda_devices_cnt;
+    int cuda_driver_version = backend_ctx->cuda_driver_version;
 
-    if (opencl_platform_skipped == false)
+    const size_t len = event_log_info (hashcat_ctx, "CUDA API (CUDA %d.%d)", cuda_driver_version / 1000, (cuda_driver_version % 100) / 10);
+
+    char line[HCBUFSIZ_TINY];
+
+    memset (line, '=', len);
+
+    line[len] = 0;
+
+    event_log_info (hashcat_ctx, "%s", line);
+
+    for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++)
     {
-      const size_t len = event_log_info (hashcat_ctx, "OpenCL Platform #%u: %s", opencl_platforms_idx + 1, opencl_platform_vendor);
+      const int backend_devices_idx = backend_ctx->backend_device_from_cuda[cuda_devices_idx];
+
+      const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
+
+      int   device_id         = device_param->device_id;
+      char *device_name       = device_param->device_name;
+      u32   device_processors = device_param->device_processors;
+      u64   device_global_mem = device_param->device_global_mem;
+
+      if ((device_param->skipped == false) && (device_param->skipped_warning == false))
+      {
+        event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 " MB allocatable, %uMCU",
+                  device_id + 1,
+                  device_name,
+                  device_global_mem   / 1024 / 1024,
+                  device_processors);
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "* Device #%u: %s, skipped",
+                  device_id + 1,
+                  device_name);
+      }
+    }
+
+    event_log_info (hashcat_ctx, NULL);
+  }
+
+  if (backend_ctx->ocl)
+  {
+    cl_uint   opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
+    cl_uint  *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt;
+    char    **opencl_platforms_vendor      = backend_ctx->opencl_platforms_vendor;
+    char    **opencl_platforms_version     = backend_ctx->opencl_platforms_version;
+
+    for (cl_uint opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
+    {
+      char     *opencl_platform_vendor       = opencl_platforms_vendor[opencl_platforms_idx];
+      char     *opencl_platform_version      = opencl_platforms_version[opencl_platforms_idx];
+      cl_uint   opencl_platform_devices_cnt  = opencl_platforms_devices_cnt[opencl_platforms_idx];
+
+      const size_t len = event_log_info (hashcat_ctx, "OpenCL API (%s) - Platform #%u [%s]", opencl_platform_version, opencl_platforms_idx + 1, opencl_platform_vendor);
 
       char line[HCBUFSIZ_TINY];
 
@@ -784,41 +828,38 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
       line[len] = 0;
 
       event_log_info (hashcat_ctx, "%s", line);
-    }
-    else
-    {
-      event_log_info (hashcat_ctx, "OpenCL Platform #%u: %s, skipped or no OpenCL compatible devices found.", opencl_platforms_idx + 1, opencl_platform_vendor);
-    }
 
-    for (cl_uint opencl_devices_idx = 0; opencl_devices_idx < opencl_devices_cnt; opencl_devices_idx++)
-    {
-      const hc_device_param_t *device_param = backend_ctx->devices_param + opencl_devices_idx;
-
-      if (device_param->opencl_platform != opencl_platform_id) continue;
-
-      char *device_name         = device_param->device_name;
-      u32   device_processors   = device_param->device_processors;
-      u64   device_maxmem_alloc = device_param->device_maxmem_alloc;
-      u64   device_global_mem   = device_param->device_global_mem;
-
-      if ((device_param->skipped == false) && (device_param->skipped_warning == false))
+      for (cl_uint opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++)
       {
-        event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 "/%" PRIu64 " MB allocatable, %uMCU",
-                  opencl_devices_idx + 1,
-                  device_name,
-                  device_maxmem_alloc / 1024 / 1024,
-                  device_global_mem   / 1024 / 1024,
-                  device_processors);
-      }
-      else
-      {
-        event_log_info (hashcat_ctx, "* Device #%u: %s, skipped.",
-                  opencl_devices_idx + 1,
-                  device_name);
-      }
-    }
+        const int backend_devices_idx = backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx];
 
-    event_log_info (hashcat_ctx, NULL);
+        const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
+
+        int   device_id           = device_param->device_id;
+        char *device_name         = device_param->device_name;
+        u32   device_processors   = device_param->device_processors;
+        u64   device_maxmem_alloc = device_param->device_maxmem_alloc;
+        u64   device_global_mem   = device_param->device_global_mem;
+
+        if ((device_param->skipped == false) && (device_param->skipped_warning == false))
+        {
+          event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 "/%" PRIu64 " MB allocatable, %uMCU",
+                    device_id + 1,
+                    device_name,
+                    device_maxmem_alloc / 1024 / 1024,
+                    device_global_mem   / 1024 / 1024,
+                    device_processors);
+        }
+        else
+        {
+          event_log_info (hashcat_ctx, "* Device #%u: %s, skipped",
+                    device_id + 1,
+                    device_name);
+        }
+      }
+
+      event_log_info (hashcat_ctx, NULL);
+    }
   }
 }
 
diff --git a/src/usage.c b/src/usage.c
index 51e764819..2b564568e 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -90,7 +90,6 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   "     --cpu-affinity             | Str  | Locks to CPU devices, separated with commas          | --cpu-affinity=1,2,3",
   "     --example-hashes           |      | Show an example hash for each hash-mode              |",
   " -I, --opencl-info              |      | Show info about detected OpenCL platforms/devices    | -I",
-  "     --opencl-platforms         | Str  | OpenCL platforms to use, separated with commas       | --opencl-platforms=2",
   " -d, --opencl-devices           | Str  | OpenCL devices to use, separated with commas         | -d 1",
   " -D, --opencl-device-types      | Str  | OpenCL device-types to use, separated with commas    | -D 1",
   "     --opencl-vector-width      | Num  | Manually override OpenCL vector-width to X           | --opencl-vector=4",
diff --git a/src/user_options.c b/src/user_options.c
index 6b6984ff6..2691c565a 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -78,7 +78,6 @@ static const struct option long_options[] =
   {"opencl-devices",            required_argument, NULL, IDX_OPENCL_DEVICES},
   {"opencl-device-types",       required_argument, NULL, IDX_OPENCL_DEVICE_TYPES},
   {"opencl-info",               no_argument,       NULL, IDX_OPENCL_INFO},
-  {"opencl-platforms",          required_argument, NULL, IDX_OPENCL_PLATFORMS},
   {"opencl-vector-width",       required_argument, NULL, IDX_OPENCL_VECTOR_WIDTH},
   {"optimized-kernel-enable",   no_argument,       NULL, IDX_OPTIMIZED_KERNEL_ENABLE},
   {"outfile-autohex-disable",   no_argument,       NULL, IDX_OUTFILE_AUTOHEX_DISABLE},
@@ -206,7 +205,6 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx)
   user_options->opencl_devices            = NULL;
   user_options->opencl_device_types       = NULL;
   user_options->opencl_info               = OPENCL_INFO;
-  user_options->opencl_platforms          = NULL;
   user_options->opencl_vector_width       = OPENCL_VECTOR_WIDTH;
   user_options->optimized_kernel_enable   = OPTIMIZED_KERNEL_ENABLE;
   user_options->outfile_autohex           = OUTFILE_AUTOHEX;
@@ -427,7 +425,6 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
       case IDX_CPU_AFFINITY:              user_options->cpu_affinity              = optarg;                          break;
       case IDX_OPENCL_INFO:               user_options->opencl_info               = true;                            break;
       case IDX_OPENCL_DEVICES:            user_options->opencl_devices            = optarg;                          break;
-      case IDX_OPENCL_PLATFORMS:          user_options->opencl_platforms          = optarg;                          break;
       case IDX_OPENCL_DEVICE_TYPES:       user_options->opencl_device_types       = optarg;                          break;
       case IDX_OPENCL_VECTOR_WIDTH:       user_options->opencl_vector_width       = hc_strtoul (optarg, NULL, 10);
                                           user_options->opencl_vector_width_chgd  = true;                            break;
@@ -1090,16 +1087,6 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if (user_options->opencl_platforms != NULL)
-  {
-    if (strlen (user_options->opencl_platforms) == 0)
-    {
-      event_log_error (hashcat_ctx, "Invalid --opencl-platforms value - must not be empty.");
-
-      return -1;
-    }
-  }
-
   if (user_options->opencl_devices != NULL)
   {
     if (strlen (user_options->opencl_devices) == 0)
@@ -1597,7 +1584,6 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
   {
     user_options->opencl_devices      = NULL;
     user_options->opencl_device_types = hcstrdup ("1,2,3");
-    user_options->opencl_platforms    = NULL;
     user_options->quiet               = true;
   }
 
@@ -1742,11 +1728,6 @@ void user_options_info (hashcat_ctx_t *hashcat_ctx)
       event_log_info (hashcat_ctx, "* --opencl-device-types=%s", user_options->opencl_device_types);
     }
 
-    if (user_options->opencl_platforms)
-    {
-      event_log_info (hashcat_ctx, "* --opencl-platforms=%s", user_options->opencl_platforms);
-    }
-
     if (user_options->optimized_kernel_enable == true)
     {
       event_log_info (hashcat_ctx, "* --optimized-kernel-enable");
@@ -1801,11 +1782,6 @@ void user_options_info (hashcat_ctx_t *hashcat_ctx)
       event_log_info (hashcat_ctx, "# option: --opencl-device-types=%s", user_options->opencl_device_types);
     }
 
-    if (user_options->opencl_platforms)
-    {
-      event_log_info (hashcat_ctx, "* option: --opencl-platforms=%s", user_options->opencl_platforms);
-    }
-
     if (user_options->optimized_kernel_enable == true)
     {
       event_log_info (hashcat_ctx, "# option: --optimized-kernel-enable");
@@ -2720,7 +2696,6 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx)
   logfile_top_string (user_options->markov_hcstat2);
   logfile_top_string (user_options->opencl_devices);
   logfile_top_string (user_options->opencl_device_types);
-  logfile_top_string (user_options->opencl_platforms);
   logfile_top_string (user_options->outfile);
   logfile_top_string (user_options->outfile_check_dir);
   logfile_top_string (user_options->potfile_path);

From 495d89f831deaee49d4d48569dfef86a7b33154e Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 1 May 2019 07:27:10 +0200
Subject: [PATCH 17/73] Find alias devices across different backend API's

---
 include/types.h |  4 ++++
 src/backend.c   | 23 ++++++++++++++++++-----
 src/terminal.c  | 22 +++++++++++++++++++---
 3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/include/types.h b/include/types.h
index 3317f37cd..8052bea6f 100644
--- a/include/types.h
+++ b/include/types.h
@@ -996,6 +996,10 @@ typedef struct hc_device_param
 {
   int     device_id;
 
+  // this occurs if the same device (pci address) is used by multiple backend API
+  int     device_id_alias_cnt;
+  int     device_id_alias_buf[DEVICES_MAX];
+
   u8      pcie_bus;
   u8      pcie_device;
   u8      pcie_function;
diff --git a/src/backend.c b/src/backend.c
index 9a24b81fe..72e084762 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -46,7 +46,7 @@ static bool is_same_device (const hc_device_param_t *src, const hc_device_param_
   return true;
 }
 
-static int backend_ctx_find_duplicate_devices (hashcat_ctx_t *hashcat_ctx)
+static int backend_ctx_find_alias_devices (hashcat_ctx_t *hashcat_ctx)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
@@ -68,7 +68,16 @@ static int backend_ctx_find_duplicate_devices (hashcat_ctx_t *hashcat_ctx)
 
       if (is_same_device (device_param_src, device_param_dst) == false) continue;
 
-      device_param_dst->skipped = true;
+      device_param_src->device_id_alias_buf[device_param_src->device_id_alias_cnt] = device_param_dst->device_id;
+      device_param_src->device_id_alias_cnt++;
+
+      device_param_dst->device_id_alias_buf[device_param_dst->device_id_alias_cnt] = device_param_src->device_id;
+      device_param_dst->device_id_alias_cnt++;
+
+      if (device_param_dst->is_opencl == true)
+      {
+        device_param_dst->skipped = true;
+      }
     }
   }
 
@@ -579,7 +588,7 @@ int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
   memset (nvrtc, 0, sizeof (NVRTC_PTR));
 
   #if   defined (_WIN)
-  nvrtc->lib = hc_dlopen ("nvrtc");
+  nvrtc->lib = hc_dlopen ("c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1\\nvrtc.lib");
   #elif defined (__APPLE__)
   nvrtc->lib = hc_dlopen ("/System/Library/Frameworks/NVRTC.framework/NVRTC");
   #elif defined (__CYGWIN__)
@@ -764,7 +773,7 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
   memset (cuda, 0, sizeof (CUDA_PTR));
 
   #if   defined (_WIN)
-  cuda->lib = hc_dlopen ("cuda");
+  cuda->lib = hc_dlopen ("c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1\\cuda.lib");
   #elif defined (__APPLE__)
   cuda->lib = hc_dlopen ("/System/Library/Frameworks/CUDA.framework/CUDA");
   #elif defined (__CYGWIN__)
@@ -3812,6 +3821,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->cuda_device = cuda_device;
 
+      device_param->is_cuda = true;
+
       // device_name
 
       char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY);
@@ -4119,6 +4130,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         device_param->opencl_platform = opencl_platform;
 
+        device_param->is_opencl = true;
+
         // opencl_device_type
 
         cl_device_type opencl_device_type;
@@ -4942,7 +4955,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   if (user_options->force == false)
   {
-    backend_ctx_find_duplicate_devices (hashcat_ctx);
+    backend_ctx_find_alias_devices (hashcat_ctx);
   }
 
   // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
diff --git a/src/terminal.c b/src/terminal.c
index 1e65b2e53..9d9d2ddcc 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -682,7 +682,15 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
       u32   device_maxclock_frequency = device_param->device_maxclock_frequency;
       u64   device_global_mem         = device_param->device_global_mem;
 
-      event_log_info (hashcat_ctx, "Backend Device ID #%d", device_id + 1);
+      if (device_param->device_id_alias_cnt)
+      {
+        event_log_info (hashcat_ctx, "Backend Device ID #%d (alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "Backend Device ID #%d", device_id + 1);
+      }
+
       event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
       event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
       event_log_info (hashcat_ctx, "  Clock..........: %u", device_maxclock_frequency);
@@ -735,7 +743,15 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
         char          *opencl_device_version      = device_param->opencl_device_version;
         char          *opencl_driver_version      = device_param->opencl_driver_version;
 
-        event_log_info (hashcat_ctx, "  Backend Device ID #%d", device_id + 1);
+        if (device_param->device_id_alias_cnt)
+        {
+          event_log_info (hashcat_ctx, "  Backend Device ID #%d (alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+        }
+        else
+        {
+          event_log_info (hashcat_ctx, "  Backend Device ID #%d", device_id + 1);
+        }
+
         event_log_info (hashcat_ctx, "    Type...........: %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
         event_log_info (hashcat_ctx, "    Vendor.ID......: %u", opencl_device_vendor_id);
         event_log_info (hashcat_ctx, "    Vendor.........: %s", opencl_device_vendor);
@@ -789,7 +805,7 @@ void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
 
       if ((device_param->skipped == false) && (device_param->skipped_warning == false))
       {
-        event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 " MB allocatable, %uMCU",
+        event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 " MB, %uMCU",
                   device_id + 1,
                   device_name,
                   device_global_mem   / 1024 / 1024,

From 3c4f4df771083b24c26acb655b742e7b24869db0 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 1 May 2019 15:52:56 +0200
Subject: [PATCH 18/73] Rename some more variables

---
 extra/tab_completion/hashcat.sh |   22 +-
 include/terminal.h              |    4 +-
 include/types.h                 |   42 +-
 src/backend.c                   |  335 ++++----
 src/bitmap.c                    |    2 +-
 src/combinator.c                |    2 +-
 src/cpt.c                       |    2 +-
 src/debugfile.c                 |    2 +-
 src/dictstat.c                  |    2 +-
 src/hashcat.c                   |   14 +-
 src/hashes.c                    |    2 +-
 src/hwmon.c                     | 1286 +++++++++++++++++++------------
 src/induct.c                    |    2 +-
 src/loopback.c                  |    2 +-
 src/main.c                      |   38 +-
 src/mpsp.c                      |    2 +-
 src/outfile_check.c             |    2 +-
 src/potfile.c                   |    2 +-
 src/restore.c                   |    2 +-
 src/straight.c                  |    2 +-
 src/terminal.c                  |    8 +-
 src/tuningdb.c                  |    2 +-
 src/usage.c                     |    6 +-
 src/user_options.c              |   88 +--
 src/wordlist.c                  |    2 +-
 25 files changed, 1105 insertions(+), 768 deletions(-)

diff --git a/extra/tab_completion/hashcat.sh b/extra/tab_completion/hashcat.sh
index 8243e7f77..e44b6e0ac 100644
--- a/extra/tab_completion/hashcat.sh
+++ b/extra/tab_completion/hashcat.sh
@@ -126,7 +126,7 @@ _hashcat_get_permutations ()
   fi
 }
 
-_hashcat_opencl_devices ()
+_hashcat_backend_devices ()
 {
   local num_devices=0
 
@@ -180,7 +180,7 @@ _hashcat ()
   local HCCAPX_MESSAGE_PAIRS="0 1 2 3 4 5"
   local OUTFILE_FORMATS="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
   local OPENCL_DEVICE_TYPES="1 2 3"
-  local OPENCL_VECTOR_WIDTH="1 2 4 8 16"
+  local BACKEND_VECTOR_WIDTH="1 2 4 8 16"
   local DEBUG_MODE="1 2 3 4"
   local WORKLOAD_PROFILE="1 2 3 4"
   local BRAIN_CLIENT_FEATURES="1 2 3"
@@ -189,8 +189,8 @@ _hashcat ()
   local BUILD_IN_CHARSETS='?l ?u ?d ?a ?b ?s ?h ?H'
 
   local SHORT_OPTS="-m -a -V -v -h -b -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -i -I -s -l -O -S -z"
-  local LONG_OPTS="--hash-type --attack-mode --version --help --quiet --benchmark --benchmark-all --hex-salt --hex-wordlist --hex-charset --force --status --status-json --status-timer --machine-readable --loopback --markov-hcstat2 --markov-disable --markov-classic --markov-threshold --runtime --session --speed-only --progress-only --restore --restore-file-path --restore-disable --outfile --outfile-format --outfile-autohex-disable --outfile-check-timer --outfile-check-dir --wordlist-autohex-disable --separator --show --left --username --remove --remove-timer --potfile-disable --potfile-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --example-hashes --opencl-info --opencl-devices --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-disable --hwmon-temp-abort --skip --limit --keyspace --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment --increment-min --increment-max --logfile-disable --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --stdout --keep-guessing --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --self-test-disable  --slow-candidates --brain-server --brain-client --brain-client-features --brain-host --brain-port --brain-session --brain-session-whitelist --brain-password"
-  local OPTIONS="-m -a -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -s -l --hash-type --attack-mode --status-timer --markov-hcstat2 --markov-threshold --runtime --session --timer --outfile --outfile-format --outfile-check-timer --outfile-check-dir --separator --remove-timer --potfile-path --restore-file-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --opencl-devices --opencl-device-types --opencl-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-temp-abort --skip --limit --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment-min --increment-max --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --brain-client-features --brain-host --brain-password --brain-port --brain-session --brain-whitelist-session --stdin-timeout-abort"
+  local LONG_OPTS="--hash-type --attack-mode --version --help --quiet --benchmark --benchmark-all --hex-salt --hex-wordlist --hex-charset --force --status --status-json --status-timer --machine-readable --loopback --markov-hcstat2 --markov-disable --markov-classic --markov-threshold --runtime --session --speed-only --progress-only --restore --restore-file-path --restore-disable --outfile --outfile-format --outfile-autohex-disable --outfile-check-timer --outfile-check-dir --wordlist-autohex-disable --separator --show --left --username --remove --remove-timer --potfile-disable --potfile-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --example-hashes --backend-info --backend-devices --opencl-device-types --backend-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-disable --hwmon-temp-abort --skip --limit --keyspace --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment --increment-min --increment-max --logfile-disable --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --stdout --keep-guessing --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --self-test-disable  --slow-candidates --brain-server --brain-client --brain-client-features --brain-host --brain-port --brain-session --brain-session-whitelist --brain-password"
+  local OPTIONS="-m -a -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -s -l --hash-type --attack-mode --status-timer --markov-hcstat2 --markov-threshold --runtime --session --timer --outfile --outfile-format --outfile-check-timer --outfile-check-dir --separator --remove-timer --potfile-path --restore-file-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --backend-devices --opencl-device-types --backend-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-temp-abort --skip --limit --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --increment-min --increment-max --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --brain-client-features --brain-host --brain-password --brain-port --brain-session --brain-whitelist-session --stdin-timeout-abort"
 
   COMPREPLY=()
   local cur="${COMP_WORDS[COMP_CWORD]}"
@@ -250,8 +250,8 @@ _hashcat ()
       return 0
       ;;
 
-     -d|--opencl-devices)
-      _hashcat_opencl_devices
+     -d|--backend-devices)
+      _hashcat_backend_devices
       local num_devices=${?}
 
       _hashcat_get_permutations ${num_devices}
@@ -265,8 +265,8 @@ _hashcat ()
       return 0
       ;;
 
-    --opencl-vector-width)
-      COMPREPLY=($(compgen -W "${OPENCL_VECTOR_WIDTH}" -- ${cur}))
+    --backend-vector-width)
+      COMPREPLY=($(compgen -W "${BACKEND_VECTOR_WIDTH}" -- ${cur}))
       return 0
       ;;
 
@@ -443,13 +443,13 @@ _hashcat ()
       ;;
 
     -d*)
-      _hashcat_opencl_devices
+      _hashcat_backend_devices
       local num_devices=${?}
 
       _hashcat_get_permutations ${num_devices}
 
-      local opencl_devices_var="$(echo "  "${hashcat_devices_permutation} | sed 's/ / -d/g')"
-      COMPREPLY=($(compgen -W "${opencl_devices_var}" -- ${cur}))
+      local backend_devices_var="$(echo "  "${hashcat_devices_permutation} | sed 's/ / -d/g')"
+      COMPREPLY=($(compgen -W "${backend_devices_var}" -- ${cur}))
       return 0
       ;;
   esac
diff --git a/include/terminal.h b/include/terminal.h
index 1687b9a9e..e15d4ef9c 100644
--- a/include/terminal.h
+++ b/include/terminal.h
@@ -43,8 +43,8 @@ void compress_terminal_line_length (char *out_buf, const size_t keep_from_beginn
 
 void example_hashes                     (hashcat_ctx_t *hashcat_ctx);
 
-void opencl_info                        (hashcat_ctx_t *hashcat_ctx);
-void opencl_info_compact                (hashcat_ctx_t *hashcat_ctx);
+void backend_info                       (hashcat_ctx_t *hashcat_ctx);
+void backend_info_compact               (hashcat_ctx_t *hashcat_ctx);
 
 void status_progress_machine_readable   (hashcat_ctx_t *hashcat_ctx);
 void status_progress                    (hashcat_ctx_t *hashcat_ctx);
diff --git a/include/types.h b/include/types.h
index 8052bea6f..9bd78be0c 100644
--- a/include/types.h
+++ b/include/types.h
@@ -44,13 +44,6 @@ typedef int16_t i16;
 typedef int32_t i32;
 typedef int64_t i64;
 
-// import types from opencl
-
-//typedef uint8_t  uchar;
-//typedef uint16_t ushort;
-//typedef uint32_t uint;
-//typedef uint64_t ulong;
-
 #include "inc_types.h"
 
 // there's no such thing in plain C, therefore all vector operation cannot work in this emu
@@ -133,10 +126,10 @@ typedef enum event_identifier
   EVENT_MONITOR_PERFORMANCE_HINT  = 0x00000086,
   EVENT_MONITOR_NOINPUT_HINT      = 0x00000087,
   EVENT_MONITOR_NOINPUT_ABORT     = 0x00000088,
-  EVENT_OPENCL_SESSION_POST       = 0x00000090,
-  EVENT_OPENCL_SESSION_PRE        = 0x00000091,
-  EVENT_OPENCL_DEVICE_INIT_POST   = 0x00000092,
-  EVENT_OPENCL_DEVICE_INIT_PRE    = 0x00000093,
+  EVENT_BACKEND_SESSION_POST      = 0x00000090,
+  EVENT_BACKEND_SESSION_PRE       = 0x00000091,
+  EVENT_BACKEND_DEVICE_INIT_POST  = 0x00000092,
+  EVENT_BACKEND_DEVICE_INIT_PRE   = 0x00000093,
   EVENT_OUTERLOOP_FINISHED        = 0x000000a0,
   EVENT_OUTERLOOP_MAINSCREEN      = 0x000000a1,
   EVENT_OUTERLOOP_STARTING        = 0x000000a2,
@@ -592,8 +585,8 @@ typedef enum user_options_defaults
   MARKOV_DISABLE           = false,
   MARKOV_THRESHOLD         = 0,
   NONCE_ERROR_CORRECTIONS  = 8,
-  OPENCL_INFO              = false,
-  OPENCL_VECTOR_WIDTH      = 0,
+  BACKEND_INFO             = false,
+  BACKEND_VECTOR_WIDTH     = 0,
   OPTIMIZED_KERNEL_ENABLE  = false,
   OUTFILE_AUTOHEX          = true,
   OUTFILE_CHECK_TIMER      = 5,
@@ -637,6 +630,9 @@ typedef enum user_options_map
 {
   IDX_ADVICE_DISABLE            = 0xff00,
   IDX_ATTACK_MODE               = 'a',
+  IDX_BACKEND_DEVICES           = 'd',
+  IDX_BACKEND_INFO              = 'I',
+  IDX_BACKEND_VECTOR_WIDTH      = 0xff27,
   IDX_BENCHMARK_ALL             = 0xff01,
   IDX_BENCHMARK                 = 'b',
   IDX_BITMAP_MAX                = 0xff02,
@@ -690,10 +686,7 @@ typedef enum user_options_map
   IDX_MARKOV_HCSTAT2            = 0xff24,
   IDX_MARKOV_THRESHOLD          = 't',
   IDX_NONCE_ERROR_CORRECTIONS   = 0xff25,
-  IDX_OPENCL_DEVICES            = 'd',
   IDX_OPENCL_DEVICE_TYPES       = 'D',
-  IDX_OPENCL_INFO               = 'I',
-  IDX_OPENCL_VECTOR_WIDTH       = 0xff27,
   IDX_OPTIMIZED_KERNEL_ENABLE   = 'O',
   IDX_OUTFILE_AUTOHEX_DISABLE   = 0xff28,
   IDX_OUTFILE_CHECK_DIR         = 0xff29,
@@ -1004,8 +997,6 @@ typedef struct hc_device_param
   u8      pcie_device;
   u8      pcie_function;
 
-  u32     opencl_platform_devices_id;  // for mapping with hms devices
-
   bool    skipped;              // permanent
   bool    skipped_warning;      // iteration
 
@@ -1267,7 +1258,7 @@ typedef struct hc_device_param
   char   *opencl_device_version;
   char   *opencl_device_c_version;
 
-  cl_platform_id  opencl_platform;
+  //cl_platform_id  opencl_platform;
   cl_device_type  opencl_device_type;
   cl_uint         opencl_device_vendor_id;
   cl_uint         opencl_platform_vendor_id;
@@ -1361,6 +1352,8 @@ typedef struct backend_ctx
   int                 opencl_devices_cnt;
   int                 opencl_devices_active;
 
+  u64                 backend_devices_filter;
+
   hc_device_param_t  *devices_param;
 
   u32                 hardware_power_all;
@@ -1368,8 +1361,6 @@ typedef struct backend_ctx
   u64                 kernel_power_all;
   u64                 kernel_power_final; // we save that so that all divisions are done from the same base
 
-  u64                 devices_filter;
-
   double              target_msec;
 
   bool                need_adl;
@@ -1393,6 +1384,7 @@ typedef struct backend_ctx
   cl_uint            *opencl_platforms_devices_cnt;
   char              **opencl_platforms_name;
   char              **opencl_platforms_vendor;
+  cl_uint            *opencl_platforms_vendor_id;
   char              **opencl_platforms_version;
 
   cl_device_type      opencl_device_types_filter;
@@ -1718,7 +1710,7 @@ typedef struct user_options
   bool         kernel_threads_chgd;
   bool         nonce_error_corrections_chgd;
   bool         spin_damp_chgd;
-  bool         opencl_vector_width_chgd;
+  bool         backend_vector_width_chgd;
   bool         outfile_format_chgd;
   bool         remove_timer_chgd;
   bool         rp_gen_seed_chgd;
@@ -1750,7 +1742,7 @@ typedef struct user_options
   bool         machine_readable;
   bool         markov_classic;
   bool         markov_disable;
-  bool         opencl_info;
+  bool         backend_info;
   bool         optimized_kernel_enable;
   bool         outfile_autohex;
   bool         potfile_disable;
@@ -1782,7 +1774,7 @@ typedef struct user_options
   char        *induction_dir;
   char        *keyboard_layout_mapping;
   char        *markov_hcstat2;
-  char        *opencl_devices;
+  char        *backend_devices;
   char        *opencl_device_types;
   char        *outfile;
   char        *outfile_check_dir;
@@ -1821,7 +1813,7 @@ typedef struct user_options
   u32          markov_threshold;
   u32          nonce_error_corrections;
   u32          spin_damp;
-  u32          opencl_vector_width;
+  u32          backend_vector_width;
   u32          outfile_check_timer;
   u32          outfile_format;
   u32          remove_timer;
diff --git a/src/backend.c b/src/backend.c
index 72e084762..b5d743f8b 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -182,13 +182,13 @@ static int ocl_check_dri (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx)
   return 0;
 }
 
-static bool setup_devices_filter (hashcat_ctx_t *hashcat_ctx, const char *opencl_devices, u64 *out)
+static bool setup_backend_devices_filter (hashcat_ctx_t *hashcat_ctx, const char *backend_devices, u64 *out)
 {
   u64 backend_devices_filter = 0;
 
-  if (opencl_devices)
+  if (backend_devices)
   {
-    char *devices = hcstrdup (opencl_devices);
+    char *devices = hcstrdup (backend_devices);
 
     if (devices == NULL) return false;
 
@@ -378,8 +378,8 @@ static bool read_kernel_binary (hashcat_ctx_t *hashcat_ctx, const char *kernel_f
 
     if (force_recompile == true)
     {
-      // this adds some hopefully unique data to the opencl kernel source
-      // the effect should be that opencl kernel compiler caching see this as new "uncached" source
+      // this adds some hopefully unique data to the backend kernel source
+      // the effect should be that backend kernel compiler caching see this as new "uncached" source
       // we have to do this since they do not check for the changes only in the #include source
 
       time_t tlog = time (NULL);
@@ -3356,7 +3356,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
           }
           else
           {
-            // it's unclear if 4s is enough to turn on boost mode for all opencl device
+            // it's unclear if 4s is enough to turn on boost mode for all backend device
 
             if ((total_msec > 4000) || (device_param->speed_pos == SPEED_CACHE - 1))
             {
@@ -3546,19 +3546,19 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
   if (rc_ocl_check == -1) return -1;
 
   /**
-   * OpenCL device selection (tbd rename)
+   * Backend device selection
    */
 
-  u64 devices_filter;
+  u64 backend_devices_filter;
 
-  const bool rc_devices_filter = setup_devices_filter (hashcat_ctx, user_options->opencl_devices, &devices_filter);
+  const bool rc_backend_devices_filter = setup_backend_devices_filter (hashcat_ctx, user_options->backend_devices, &backend_devices_filter);
 
-  if (rc_devices_filter == false) return -1;
+  if (rc_backend_devices_filter == false) return -1;
 
-  backend_ctx->devices_filter = devices_filter;
+  backend_ctx->backend_devices_filter = backend_devices_filter;
 
   /**
-   * OpenCL device type selection (tbd rename)
+   * OpenCL device type selection
    */
 
   cl_device_type opencl_device_types_filter;
@@ -3596,6 +3596,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
       hcfree (opencl_platforms_devices_cnt);  \
       hcfree (opencl_platforms_name);         \
       hcfree (opencl_platforms_vendor);       \
+      hcfree (opencl_platforms_vendor_id);    \
       hcfree (opencl_platforms_version);      \
     }
 
@@ -3605,6 +3606,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     cl_uint        *opencl_platforms_devices_cnt = (cl_uint *)        hccalloc (CL_PLATFORMS_MAX, sizeof (cl_uint));
     char          **opencl_platforms_name        = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
     char          **opencl_platforms_vendor      = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
+    cl_uint        *opencl_platforms_vendor_id   = (cl_uint *)        hccalloc (CL_PLATFORMS_MAX, sizeof (cl_uint));
     char          **opencl_platforms_version     = (char **)          hccalloc (CL_PLATFORMS_MAX, sizeof (char *));
 
     int CL_rc = hc_clGetPlatformIDs (hashcat_ctx, CL_PLATFORMS_MAX, opencl_platforms, &opencl_platforms_cnt);
@@ -3620,6 +3622,123 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
     if (opencl_platforms_cnt)
     {
+      for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
+      {
+        cl_platform_id opencl_platform = opencl_platforms[opencl_platforms_idx];
+
+        size_t param_value_size = 0;
+
+        // platform vendor
+
+        int CL_rc;
+
+        CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VENDOR, 0, NULL, &param_value_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *opencl_platform_vendor = (char *) hcmalloc (param_value_size);
+
+        CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VENDOR, param_value_size, opencl_platform_vendor, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        opencl_platforms_vendor[opencl_platforms_idx] = opencl_platform_vendor;
+
+        // platform name
+
+        CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_NAME, 0, NULL, &param_value_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *opencl_platform_name = (char *) hcmalloc (param_value_size);
+
+        CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_NAME, param_value_size, opencl_platform_name, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        opencl_platforms_name[opencl_platforms_idx] = opencl_platform_name;
+
+        // platform version
+
+        CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VERSION, 0, NULL, &param_value_size);
+
+        if (CL_rc == -1) return -1;
+
+        char *opencl_platform_version = (char *) hcmalloc (param_value_size);
+
+        CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VERSION, param_value_size, opencl_platform_version, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        opencl_platforms_version[opencl_platforms_idx] = opencl_platform_version;
+
+        // find our own platform vendor because pocl and mesa are pushing original vendor_id through opencl
+        // this causes trouble with vendor id based macros
+        // we'll assign generic to those without special optimization available
+
+        cl_uint opencl_platform_vendor_id = 0;
+
+        if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD1) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_AMD;
+        }
+        else if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD2) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_AMD;
+        }
+        else if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_AMD_USE_INTEL;
+        }
+        else if (strcmp (opencl_platform_vendor, CL_VENDOR_APPLE) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_APPLE;
+        }
+        else if (strcmp (opencl_platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_INTEL_BEIGNET;
+        }
+        else if (strcmp (opencl_platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_INTEL_SDK;
+        }
+        else if (strcmp (opencl_platform_vendor, CL_VENDOR_MESA) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_MESA;
+        }
+        else if (strcmp (opencl_platform_vendor, CL_VENDOR_NV) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_NV;
+        }
+        else if (strcmp (opencl_platform_vendor, CL_VENDOR_POCL) == 0)
+        {
+          opencl_platform_vendor_id = VENDOR_ID_POCL;
+        }
+        else
+        {
+          opencl_platform_vendor_id = VENDOR_ID_GENERIC;
+        }
+
+        opencl_platforms_vendor_id[opencl_platforms_idx] = opencl_platform_vendor_id;
+
+        cl_device_id *opencl_platform_devices = (cl_device_id *) hccalloc (DEVICES_MAX, sizeof (cl_device_id));
+
+        cl_uint opencl_platform_devices_cnt = 0;
+
+        CL_rc = hc_clGetDeviceIDs (hashcat_ctx, opencl_platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, opencl_platform_devices, &opencl_platform_devices_cnt);
+
+        if (CL_rc == -1)
+        {
+          event_log_error (hashcat_ctx, "clGetDeviceIDs(): %s", val2cstr_cl (CL_rc));
+
+          return -1;
+        }
+
+        opencl_platforms_devices[opencl_platforms_idx] = opencl_platform_devices;
+
+        opencl_platforms_devices_cnt[opencl_platforms_idx] = opencl_platform_devices_cnt;
+      }
+
       if (user_options->opencl_device_types == NULL)
       {
         /**
@@ -3631,22 +3750,8 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
         for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
         {
-          cl_platform_id opencl_platform = opencl_platforms[opencl_platforms_idx];
-
-          cl_device_id *opencl_platform_devices = (cl_device_id *) hccalloc (DEVICES_MAX, sizeof (cl_device_id));
-
-          cl_uint opencl_platform_devices_cnt = 0;
-
-          CL_rc = hc_clGetDeviceIDs (hashcat_ctx, opencl_platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, opencl_platform_devices, &opencl_platform_devices_cnt);
-
-          if (CL_rc == -1)
-          {
-            hcfree (opencl_platform_devices);
-
-            FREE_OPENCL_CTX_ON_ERROR;
-
-            continue;
-          }
+          cl_device_id *opencl_platform_devices     = opencl_platforms_devices[opencl_platforms_idx];
+          cl_uint       opencl_platform_devices_cnt = opencl_platforms_devices_cnt[opencl_platforms_idx];
 
           for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++)
           {
@@ -3665,8 +3770,6 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
             opencl_device_types_all |= opencl_device_type;
           }
-
-          hcfree (opencl_platform_devices);
         }
 
         // In such a case, automatically enable CPU device type support, since it's disabled by default.
@@ -3697,6 +3800,7 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
     backend_ctx->opencl_platforms_devices_cnt = opencl_platforms_devices_cnt;
     backend_ctx->opencl_platforms_name        = opencl_platforms_name;
     backend_ctx->opencl_platforms_vendor      = opencl_platforms_vendor;
+    backend_ctx->opencl_platforms_vendor_id   = opencl_platforms_vendor_id;
     backend_ctx->opencl_platforms_version     = opencl_platforms_version;
 
     #undef FREE_OPENCL_CTX_ON_ERROR
@@ -3762,6 +3866,7 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
   hcfree (backend_ctx->opencl_platforms_devices_cnt);
   hcfree (backend_ctx->opencl_platforms_name);
   hcfree (backend_ctx->opencl_platforms_vendor);
+  hcfree (backend_ctx->opencl_platforms_vendor_id);
   hcfree (backend_ctx->opencl_platforms_version);
 
   memset (backend_ctx, 0, sizeof (backend_ctx_t));
@@ -3962,7 +4067,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // skipped
 
-      if ((backend_ctx->devices_filter & (1ULL << device_id)) == 0)
+      if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0)
       {
         device_param->skipped = true;
       }
@@ -3988,127 +4093,16 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
      * OpenCL devices: simply push all devices from all platforms into the same device array
      */
 
-    cl_platform_id *opencl_platforms             = backend_ctx->opencl_platforms;
     cl_uint         opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
     cl_device_id  **opencl_platforms_devices     = backend_ctx->opencl_platforms_devices;
     cl_uint        *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt;
-    char          **opencl_platforms_name        = backend_ctx->opencl_platforms_name;
-    char          **opencl_platforms_vendor      = backend_ctx->opencl_platforms_vendor;
-    char          **opencl_platforms_version     = backend_ctx->opencl_platforms_version;
+    cl_uint        *opencl_platforms_vendor_id   = backend_ctx->opencl_platforms_vendor_id;
 
-    for (u32 opencl_platform_idx = 0; opencl_platform_idx < opencl_platforms_cnt; opencl_platform_idx++)
+    for (u32 opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
     {
-      size_t param_value_size = 0;
-
-      cl_platform_id opencl_platform = opencl_platforms[opencl_platform_idx];
-
-      // platform vendor
-
-      int CL_rc;
-
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VENDOR, 0, NULL, &param_value_size);
-
-      if (CL_rc == -1) return -1;
-
-      char *opencl_platform_vendor = (char *) hcmalloc (param_value_size);
-
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VENDOR, param_value_size, opencl_platform_vendor, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      opencl_platforms_vendor[opencl_platform_idx] = opencl_platform_vendor;
-
-      // platform name
-
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_NAME, 0, NULL, &param_value_size);
-
-      if (CL_rc == -1) return -1;
-
-      char *opencl_platform_name = (char *) hcmalloc (param_value_size);
-
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_NAME, param_value_size, opencl_platform_name, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      opencl_platforms_name[opencl_platform_idx] = opencl_platform_name;
-
-      // platform version
-
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VERSION, 0, NULL, &param_value_size);
-
-      if (CL_rc == -1) return -1;
-
-      char *opencl_platform_version = (char *) hcmalloc (param_value_size);
-
-      CL_rc = hc_clGetPlatformInfo (hashcat_ctx, opencl_platform, CL_PLATFORM_VERSION, param_value_size, opencl_platform_version, NULL);
-
-      if (CL_rc == -1) return -1;
-
-      opencl_platforms_version[opencl_platform_idx] = opencl_platform_version;
-
-      // find our own platform vendor because pocl and mesa are pushing original vendor_id through opencl
-      // this causes trouble with vendor id based macros
-      // we'll assign generic to those without special optimization available
-
-      cl_uint opencl_platform_vendor_id = 0;
-
-      if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD1) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_AMD;
-      }
-      else if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD2) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_AMD;
-      }
-      else if (strcmp (opencl_platform_vendor, CL_VENDOR_AMD_USE_INTEL) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_AMD_USE_INTEL;
-      }
-      else if (strcmp (opencl_platform_vendor, CL_VENDOR_APPLE) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_APPLE;
-      }
-      else if (strcmp (opencl_platform_vendor, CL_VENDOR_INTEL_BEIGNET) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_INTEL_BEIGNET;
-      }
-      else if (strcmp (opencl_platform_vendor, CL_VENDOR_INTEL_SDK) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_INTEL_SDK;
-      }
-      else if (strcmp (opencl_platform_vendor, CL_VENDOR_MESA) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_MESA;
-      }
-      else if (strcmp (opencl_platform_vendor, CL_VENDOR_NV) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_NV;
-      }
-      else if (strcmp (opencl_platform_vendor, CL_VENDOR_POCL) == 0)
-      {
-        opencl_platform_vendor_id = VENDOR_ID_POCL;
-      }
-      else
-      {
-        opencl_platform_vendor_id = VENDOR_ID_GENERIC;
-      }
-
-      cl_device_id *opencl_platform_devices = (cl_device_id *) hccalloc (DEVICES_MAX, sizeof (cl_device_id));
-
-      cl_uint opencl_platform_devices_cnt = 0;
-
-      CL_rc = hc_clGetDeviceIDs (hashcat_ctx, opencl_platform, CL_DEVICE_TYPE_ALL, DEVICES_MAX, opencl_platform_devices, &opencl_platform_devices_cnt);
-
-      if (CL_rc == -1)
-      {
-        event_log_error (hashcat_ctx, "clGetDeviceIDs(): %s", val2cstr_cl (CL_rc));
-
-        return -1;
-      }
-
-      opencl_platforms_devices[opencl_platform_idx] = opencl_platform_devices;
-
-      opencl_platforms_devices_cnt[opencl_platform_idx] = opencl_platform_devices_cnt;
+      cl_device_id   *opencl_platform_devices     = opencl_platforms_devices[opencl_platforms_idx];
+      cl_uint         opencl_platform_devices_cnt = opencl_platforms_devices_cnt[opencl_platforms_idx];
+      cl_uint         opencl_platform_vendor_id   = opencl_platforms_vendor_id[opencl_platforms_idx];
 
       for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, backend_devices_idx++, opencl_devices_cnt++)
       {
@@ -4120,18 +4114,20 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         backend_ctx->backend_device_from_opencl[opencl_devices_cnt] = backend_devices_idx;
 
-        backend_ctx->backend_device_from_opencl_platform[opencl_platform_idx][opencl_platform_devices_idx] = backend_devices_idx;
+        backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx] = backend_devices_idx;
 
         device_param->opencl_platform_vendor_id = opencl_platform_vendor_id;
 
         device_param->opencl_device = opencl_platform_devices[opencl_platform_devices_idx];
 
-        device_param->opencl_platform_devices_id = opencl_platform_devices_idx;
-
-        device_param->opencl_platform = opencl_platform;
+        //device_param->opencl_platform = opencl_platform;
 
         device_param->is_opencl = true;
 
+        size_t param_value_size = 0;
+
+        int CL_rc;
+
         // opencl_device_type
 
         cl_device_type opencl_device_type;
@@ -4493,7 +4489,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         // skipped
 
-        if ((backend_ctx->devices_filter & (1ULL << device_id)) == 0)
+        if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0)
         {
           device_param->skipped = true;
         }
@@ -4606,7 +4602,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (device_param->skipped == false)
         {
-          if ((user_options->force == false) && (user_options->opencl_info == false))
+          if ((user_options->force == false) && (user_options->backend_info == false))
           {
             if (opencl_device_type & CL_DEVICE_TYPE_CPU)
             {
@@ -4785,6 +4781,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         cl_context context;
 
+        /*
         cl_context_properties properties[3];
 
         properties[0] = CL_CONTEXT_PLATFORM;
@@ -4792,6 +4789,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         properties[2] = 0;
 
         CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context);
+        */
+
+        CL_rc = hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context);
 
         if (CL_rc == -1) return -1;
 
@@ -4960,13 +4960,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
 
-  if (backend_ctx->devices_filter != (u64) -1)
+  if (backend_ctx->backend_devices_filter != (u64) -1)
   {
     const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
 
-    if (backend_ctx->devices_filter > backend_devices_cnt_mask)
+    if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
     {
-      event_log_error (hashcat_ctx, "An invalid device was specified using the --opencl-devices parameter.");
+      event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
       event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
 
       return -1;
@@ -5007,10 +5007,13 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
 
     hcfree (device_param->device_name);
 
-    hcfree (device_param->opencl_driver_version);
-    hcfree (device_param->opencl_device_version);
-    hcfree (device_param->opencl_device_c_version);
-    hcfree (device_param->opencl_device_vendor);
+    if (device_param->is_opencl == true)
+    {
+      hcfree (device_param->opencl_driver_version);
+      hcfree (device_param->opencl_device_version);
+      hcfree (device_param->opencl_device_c_version);
+      hcfree (device_param->opencl_device_vendor);
+    }
   }
 
   backend_ctx->backend_devices_cnt    = 0;
@@ -5374,7 +5377,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->skipped == true) continue;
 
-    EVENT_DATA (EVENT_OPENCL_DEVICE_INIT_PRE, &backend_devices_idx, sizeof (int));
+    EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_PRE, &backend_devices_idx, sizeof (int));
 
     const int device_id = device_param->device_id;
 
@@ -5403,7 +5406,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     cl_uint vector_width;
 
-    if (user_options->opencl_vector_width_chgd == false)
+    if (user_options->backend_vector_width_chgd == false)
     {
       // tuning db
 
@@ -5440,7 +5443,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     }
     else
     {
-      vector_width = user_options->opencl_vector_width;
+      vector_width = user_options->backend_vector_width;
     }
 
     // We can't have SIMD in kernels where we have an unknown final password length
@@ -5600,6 +5603,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * create context for each device
      */
 
+    /*
     cl_context_properties properties[3];
 
     properties[0] = CL_CONTEXT_PLATFORM;
@@ -5607,6 +5611,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     properties[2] = 0;
 
     CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
+    */
+
+    CL_rc = hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
 
     if (CL_rc == -1) return -1;
 
@@ -7720,7 +7727,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     hardware_power_all += device_param->hardware_power;
 
-    EVENT_DATA (EVENT_OPENCL_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int));
+    EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int));
   }
 
   if (user_options->benchmark == false)
diff --git a/src/bitmap.c b/src/bitmap.c
index 66f9ebd0f..6b1da1362 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -82,7 +82,7 @@ int bitmap_ctx_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->usage          == true) return 0;
   if (user_options->version        == true) return 0;
diff --git a/src/combinator.c b/src/combinator.c
index ccf7b4792..a0c3f16eb 100644
--- a/src/combinator.c
+++ b/src/combinator.c
@@ -21,7 +21,7 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->example_hashes == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->usage          == true) return 0;
   if (user_options->version        == true) return 0;
diff --git a/src/cpt.c b/src/cpt.c
index 90ce8728d..72db45415 100644
--- a/src/cpt.c
+++ b/src/cpt.c
@@ -18,7 +18,7 @@ int cpt_ctx_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->usage          == true) return 0;
   if (user_options->version        == true) return 0;
diff --git a/src/debugfile.c b/src/debugfile.c
index f6519534e..5e0a62bbe 100644
--- a/src/debugfile.c
+++ b/src/debugfile.c
@@ -90,7 +90,7 @@ int debugfile_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->stdout_flag    == true) return 0;
   if (user_options->speed_only     == true) return 0;
diff --git a/src/dictstat.c b/src/dictstat.c
index 628c15187..7ee0e4297 100644
--- a/src/dictstat.c
+++ b/src/dictstat.c
@@ -56,7 +56,7 @@ int dictstat_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->usage          == true) return 0;
   if (user_options->version        == true) return 0;
diff --git a/src/hashcat.c b/src/hashcat.c
index 11d212243..667b7eecc 100644
--- a/src/hashcat.c
+++ b/src/hashcat.c
@@ -207,7 +207,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
   EVENT (EVENT_AUTOTUNE_FINISHED);
 
   /**
-   * find same opencl devices and equal results
+   * find same backend devices and equal results
    */
 
   backend_ctx_devices_sync_tuning (hashcat_ctx);
@@ -295,7 +295,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
     // however, that can create confusion in hashcats RC, because exhausted translates to RC = 1.
     // but then having RC = 1 does not match our expection if we use for speed-only and progress-only.
     // to get hashcat to return RC = 0 we have to set it to CRACKED or BYPASS
-    // note: other options like --show, --left, --benchmark, --keyspace, --opencl-info, etc.
+    // note: other options like --show, --left, --benchmark, --keyspace, --backend-info, etc.
     // not not reach this section of the code, they've returned already with rc 0.
 
     if ((user_options->speed_only == true) || (user_options->progress_only == true))
@@ -720,13 +720,13 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
    * inform the user
    */
 
-  EVENT (EVENT_OPENCL_SESSION_PRE);
+  EVENT (EVENT_BACKEND_SESSION_PRE);
 
   const int rc_session_begin = backend_session_begin (hashcat_ctx);
 
   if (rc_session_begin == -1) return -1;
 
-  EVENT (EVENT_OPENCL_SESSION_POST);
+  EVENT (EVENT_BACKEND_SESSION_POST);
 
   /**
    * create self-test threads
@@ -879,7 +879,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx)
 
   potfile_write_close (hashcat_ctx);
 
-  // finalize opencl session
+  // finalize backend session
 
   backend_session_destroy (hashcat_ctx);
 
@@ -1169,7 +1169,7 @@ int hashcat_session_init (hashcat_ctx_t *hashcat_ctx, const char *install_folder
   if (rc_user_options_check_files == -1) return -1;
 
   /**
-   * Init OpenCL library loader
+   * Init backend library loader
    */
 
   const int rc_backend_init = backend_ctx_init (hashcat_ctx);
@@ -1177,7 +1177,7 @@ int hashcat_session_init (hashcat_ctx_t *hashcat_ctx, const char *install_folder
   if (rc_backend_init == -1) return -1;
 
   /**
-   * Init OpenCL devices
+   * Init backend devices
    */
 
   const int rc_devices_init = backend_ctx_devices_init (hashcat_ctx, comptime);
diff --git a/src/hashes.c b/src/hashes.c
index 653123d9d..5306d2341 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -840,7 +840,7 @@ int hashes_init_stage1 (hashcat_ctx_t *hashcat_ctx)
   else if (user_options->stdout_flag == true)
   {
   }
-  else if (user_options->opencl_info == true)
+  else if (user_options->backend_info == true)
   {
   }
   else
diff --git a/src/hwmon.c b/src/hwmon.c
index 7d710a44c..2bce94a3d 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -1351,36 +1351,7 @@ int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
 
   if (hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-    if (hwmon_ctx->hm_adl)
-    {
-      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
-      {
-
-      }
-      else if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
-      {
-        int CurrentValue = 0;
-        int DefaultValue = 0;
-
-        if (hm_ADL_Overdrive6_TargetTemperatureData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &CurrentValue, &DefaultValue) == -1)
-        {
-          hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false;
-
-          return -1;
-        }
-
-        // the return value has never been tested since hm_ADL_Overdrive6_TargetTemperatureData_Get() never worked on any system. expect problems.
-
-        return DefaultValue;
-      }
-    }
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1397,6 +1368,55 @@ int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      if (hwmon_ctx->hm_adl)
+      {
+        if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+        {
+
+        }
+        else if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
+        {
+          int CurrentValue = 0;
+          int DefaultValue = 0;
+
+          if (hm_ADL_Overdrive6_TargetTemperatureData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &CurrentValue, &DefaultValue) == -1)
+          {
+            hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false;
+
+            return -1;
+          }
+
+          // the return value has never been tested since hm_ADL_Overdrive6_TargetTemperatureData_Get() never worked on any system. expect problems.
+
+          return DefaultValue;
+        }
+      }
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        int target = 0;
+
+        if (hm_NVML_nvmlDeviceGetTemperatureThreshold (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_TEMPERATURE_THRESHOLD_SLOWDOWN, (unsigned int *) &target) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false;
+
+          return -1;
+        }
+
+        return target;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false;
 
   return -1;
@@ -1411,24 +1431,7 @@ int hm_get_threshold_shutdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
 
   if (hwmon_ctx->hm_device[backend_device_idx].threshold_shutdown_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-    if (hwmon_ctx->hm_adl)
-    {
-      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
-      {
-
-      }
-      else if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
-      {
-
-      }
-    }
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1445,6 +1448,43 @@ int hm_get_threshold_shutdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      if (hwmon_ctx->hm_adl)
+      {
+        if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+        {
+
+        }
+        else if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
+        {
+
+        }
+      }
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        int target = 0;
+
+        if (hm_NVML_nvmlDeviceGetTemperatureThreshold (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, (unsigned int *) &target) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].threshold_shutdown_get_supported = false;
+
+          return -1;
+        }
+
+        return target;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].threshold_shutdown_get_supported = false;
 
   return -1;
@@ -1459,59 +1499,7 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
 
   if (hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-    if (hwmon_ctx->hm_adl)
-    {
-      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
-      {
-        ADLTemperature Temperature;
-
-        Temperature.iSize = sizeof (ADLTemperature);
-
-        if (hm_ADL_Overdrive5_Temperature_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &Temperature) == -1)
-        {
-          hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
-
-          return -1;
-        }
-
-        return Temperature.iTemperature / 1000;
-      }
-
-      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
-      {
-        int Temperature = 0;
-
-        if (hm_ADL_Overdrive6_Temperature_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &Temperature) == -1)
-        {
-          hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
-
-          return -1;
-        }
-
-        return Temperature / 1000;
-      }
-    }
-
-    if (hwmon_ctx->hm_sysfs)
-    {
-      int temperature = 0;
-
-      if (hm_SYSFS_get_temperature_current (hashcat_ctx, backend_device_idx, &temperature) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
-
-        return -1;
-      }
-
-      return temperature;
-    }
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1528,6 +1516,78 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      if (hwmon_ctx->hm_adl)
+      {
+        if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+        {
+          ADLTemperature Temperature;
+
+          Temperature.iSize = sizeof (ADLTemperature);
+
+          if (hm_ADL_Overdrive5_Temperature_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &Temperature) == -1)
+          {
+            hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
+
+            return -1;
+          }
+
+          return Temperature.iTemperature / 1000;
+        }
+
+        if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
+        {
+          int Temperature = 0;
+
+          if (hm_ADL_Overdrive6_Temperature_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &Temperature) == -1)
+          {
+            hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
+
+            return -1;
+          }
+
+          return Temperature / 1000;
+        }
+      }
+
+      if (hwmon_ctx->hm_sysfs)
+      {
+        int temperature = 0;
+
+        if (hm_SYSFS_get_temperature_current (hashcat_ctx, backend_device_idx, &temperature) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
+
+          return -1;
+        }
+
+        return temperature;
+      }
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        int temperature = 0;
+
+        if (hm_NVML_nvmlDeviceGetTemperature (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_TEMPERATURE_GPU, (u32 *) &temperature) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
+
+          return -1;
+        }
+
+        return temperature;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false;
 
   return -1;
@@ -1542,49 +1602,57 @@ int hm_get_fanpolicy_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
 
   if (hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
-    if (hwmon_ctx->hm_adl)
+    return 1;
+  }
+
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
     {
-      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+      if (hwmon_ctx->hm_adl)
       {
-        ADLFanSpeedValue lpFanSpeedValue;
-
-        memset (&lpFanSpeedValue, 0, sizeof (lpFanSpeedValue));
-
-        lpFanSpeedValue.iSize      = sizeof (lpFanSpeedValue);
-        lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT;
-
-        if (hm_ADL_Overdrive5_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &lpFanSpeedValue) == -1)
+        if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
         {
-          hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false;
-          hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported  = false;
+          ADLFanSpeedValue lpFanSpeedValue;
 
-          return -1;
+          memset (&lpFanSpeedValue, 0, sizeof (lpFanSpeedValue));
+
+          lpFanSpeedValue.iSize      = sizeof (lpFanSpeedValue);
+          lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT;
+
+          if (hm_ADL_Overdrive5_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &lpFanSpeedValue) == -1)
+          {
+            hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false;
+            hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported  = false;
+
+            return -1;
+          }
+
+          return (lpFanSpeedValue.iFanSpeed & ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED) ? 0 : 1;
         }
 
-        return (lpFanSpeedValue.iFanSpeed & ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED) ? 0 : 1;
+        if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
+        {
+          return 1;
+        }
       }
 
-      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
+      if (hwmon_ctx->hm_sysfs)
       {
         return 1;
       }
     }
 
-    if (hwmon_ctx->hm_sysfs)
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
     {
       return 1;
     }
   }
 
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
-  {
-    return 1;
-  }
-
   hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false;
   hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported  = false;
 
@@ -1600,65 +1668,7 @@ int hm_get_fanspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
 
   if (hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-    if (hwmon_ctx->hm_adl)
-    {
-      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
-      {
-        ADLFanSpeedValue lpFanSpeedValue;
-
-        memset (&lpFanSpeedValue, 0, sizeof (lpFanSpeedValue));
-
-        lpFanSpeedValue.iSize      = sizeof (lpFanSpeedValue);
-        lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT;
-        lpFanSpeedValue.iFlags     = ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED;
-
-        if (hm_ADL_Overdrive5_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &lpFanSpeedValue) == -1)
-        {
-          hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
-
-          return -1;
-        }
-
-        return lpFanSpeedValue.iFanSpeed;
-      }
-
-      if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
-      {
-        ADLOD6FanSpeedInfo faninfo;
-
-        memset (&faninfo, 0, sizeof (faninfo));
-
-        if (hm_ADL_Overdrive6_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &faninfo) == -1)
-        {
-          hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
-
-          return -1;
-        }
-
-        return faninfo.iFanSpeedPercent;
-      }
-    }
-
-    if (hwmon_ctx->hm_sysfs)
-    {
-      int speed = 0;
-
-      if (hm_SYSFS_get_fan_speed_current (hashcat_ctx, backend_device_idx, &speed) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
-
-        return -1;
-      }
-
-      return speed;
-    }
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1675,6 +1685,84 @@ int hm_get_fanspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      if (hwmon_ctx->hm_adl)
+      {
+        if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5)
+        {
+          ADLFanSpeedValue lpFanSpeedValue;
+
+          memset (&lpFanSpeedValue, 0, sizeof (lpFanSpeedValue));
+
+          lpFanSpeedValue.iSize      = sizeof (lpFanSpeedValue);
+          lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT;
+          lpFanSpeedValue.iFlags     = ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED;
+
+          if (hm_ADL_Overdrive5_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, 0, &lpFanSpeedValue) == -1)
+          {
+            hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
+
+            return -1;
+          }
+
+          return lpFanSpeedValue.iFanSpeed;
+        }
+
+        if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6)
+        {
+          ADLOD6FanSpeedInfo faninfo;
+
+          memset (&faninfo, 0, sizeof (faninfo));
+
+          if (hm_ADL_Overdrive6_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &faninfo) == -1)
+          {
+            hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
+
+            return -1;
+          }
+
+          return faninfo.iFanSpeedPercent;
+        }
+      }
+
+      if (hwmon_ctx->hm_sysfs)
+      {
+        int speed = 0;
+
+        if (hm_SYSFS_get_fan_speed_current (hashcat_ctx, backend_device_idx, &speed) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
+
+          return -1;
+        }
+
+        return speed;
+      }
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        int speed = 0;
+
+        if (hm_NVML_nvmlDeviceGetFanSpeed (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, (u32 *) &speed) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
+
+          return -1;
+        }
+
+        return speed;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false;
 
   return -1;
@@ -1689,42 +1777,7 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
 
   if (hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-    if (hwmon_ctx->hm_adl)
-    {
-      ADLPMActivity PMActivity;
-
-      PMActivity.iSize = sizeof (ADLPMActivity);
-
-      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
-
-        return -1;
-      }
-
-      return PMActivity.iCurrentBusLanes;
-    }
-
-    if (hwmon_ctx->hm_sysfs)
-    {
-      int lanes;
-
-      if (hm_SYSFS_get_pp_dpm_pcie (hashcat_ctx, backend_device_idx, &lanes) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
-
-        return -1;
-      }
-
-      return lanes;
-    }
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1741,6 +1794,61 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      if (hwmon_ctx->hm_adl)
+      {
+        ADLPMActivity PMActivity;
+
+        PMActivity.iSize = sizeof (ADLPMActivity);
+
+        if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
+
+          return -1;
+        }
+
+        return PMActivity.iCurrentBusLanes;
+      }
+
+      if (hwmon_ctx->hm_sysfs)
+      {
+        int lanes;
+
+        if (hm_SYSFS_get_pp_dpm_pcie (hashcat_ctx, backend_device_idx, &lanes) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
+
+          return -1;
+        }
+
+        return lanes;
+      }
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        unsigned int currLinkWidth;
+
+        if (hm_NVML_nvmlDeviceGetCurrPcieLinkWidth (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &currLinkWidth) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
+
+          return -1;
+        }
+
+        return currLinkWidth;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
 
   return -1;
@@ -1755,28 +1863,7 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
 
   if (hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-    if (hwmon_ctx->hm_adl)
-    {
-      ADLPMActivity PMActivity;
-
-      PMActivity.iSize = sizeof (ADLPMActivity);
-
-      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
-
-        return -1;
-      }
-
-      return PMActivity.iActivityPercent;
-    }
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1793,6 +1880,47 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      if (hwmon_ctx->hm_adl)
+      {
+        ADLPMActivity PMActivity;
+
+        PMActivity.iSize = sizeof (ADLPMActivity);
+
+        if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
+
+          return -1;
+        }
+
+        return PMActivity.iActivityPercent;
+      }
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        nvmlUtilization_t utilization;
+
+        if (hm_NVML_nvmlDeviceGetUtilizationRates (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &utilization) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
+
+          return -1;
+        }
+
+        return utilization.gpu;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false;
 
   return -1;
@@ -1807,42 +1935,7 @@ int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
 
   if (hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-    if (hwmon_ctx->hm_adl)
-    {
-      ADLPMActivity PMActivity;
-
-      PMActivity.iSize = sizeof (ADLPMActivity);
-
-      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
-
-        return -1;
-      }
-
-      return PMActivity.iMemoryClock / 100;
-    }
-
-    if (hwmon_ctx->hm_sysfs)
-    {
-      int clockfreq;
-
-      if (hm_SYSFS_get_pp_dpm_mclk (hashcat_ctx, backend_device_idx, &clockfreq) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
-
-        return -1;
-      }
-
-      return clockfreq;
-    }
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1859,6 +1952,61 @@ int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      if (hwmon_ctx->hm_adl)
+      {
+        ADLPMActivity PMActivity;
+
+        PMActivity.iSize = sizeof (ADLPMActivity);
+
+        if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
+
+          return -1;
+        }
+
+        return PMActivity.iMemoryClock / 100;
+      }
+
+      if (hwmon_ctx->hm_sysfs)
+      {
+        int clockfreq;
+
+        if (hm_SYSFS_get_pp_dpm_mclk (hashcat_ctx, backend_device_idx, &clockfreq) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
+
+          return -1;
+        }
+
+        return clockfreq;
+      }
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        unsigned int clockfreq;
+
+        if (hm_NVML_nvmlDeviceGetClockInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_CLOCK_MEM, &clockfreq) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
+
+          return -1;
+        }
+
+        return clockfreq;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false;
 
   return -1;
@@ -1873,42 +2021,7 @@ int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
 
   if (hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-    if (hwmon_ctx->hm_adl)
-    {
-      ADLPMActivity PMActivity;
-
-      PMActivity.iSize = sizeof (ADLPMActivity);
-
-      if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
-
-        return -1;
-      }
-
-      return PMActivity.iEngineClock / 100;
-    }
-
-    if (hwmon_ctx->hm_sysfs)
-    {
-      int clockfreq;
-
-      if (hm_SYSFS_get_pp_dpm_sclk (hashcat_ctx, backend_device_idx, &clockfreq) == -1)
-      {
-        hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
-
-        return -1;
-      }
-
-      return clockfreq;
-    }
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1925,6 +2038,61 @@ int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+      if (hwmon_ctx->hm_adl)
+      {
+        ADLPMActivity PMActivity;
+
+        PMActivity.iSize = sizeof (ADLPMActivity);
+
+        if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
+
+          return -1;
+        }
+
+        return PMActivity.iEngineClock / 100;
+      }
+
+      if (hwmon_ctx->hm_sysfs)
+      {
+        int clockfreq;
+
+        if (hm_SYSFS_get_pp_dpm_sclk (hashcat_ctx, backend_device_idx, &clockfreq) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
+
+          return -1;
+        }
+
+        return clockfreq;
+      }
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        unsigned int clockfreq;
+
+        if (hm_NVML_nvmlDeviceGetClockInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, NVML_CLOCK_SM, &clockfreq) == -1)
+        {
+          hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
+
+          return -1;
+        }
+
+        return clockfreq;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false;
 
   return -1;
@@ -1939,13 +2107,7 @@ int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
 
   if (hwmon_ctx->hm_device[backend_device_idx].throttle_get_supported == false) return -1;
 
-  if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
-  {
-  }
-
-  if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+  if (backend_ctx->devices_param[backend_device_idx].is_cuda == true)
   {
     if (hwmon_ctx->hm_nvml)
     {
@@ -1991,6 +2153,61 @@ int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
     }
   }
 
+  if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
+  {
+    if ((backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) return -1;
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
+    {
+    }
+
+    if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      if (hwmon_ctx->hm_nvml)
+      {
+        /* this is triggered by mask generator, too. therefore useless
+        unsigned long long clocksThrottleReasons = 0;
+        unsigned long long supportedThrottleReasons = 0;
+
+        if (hm_NVML_nvmlDeviceGetCurrentClocksThrottleReasons   (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &clocksThrottleReasons)    == -1) return -1;
+        if (hm_NVML_nvmlDeviceGetSupportedClocksThrottleReasons (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvml, &supportedThrottleReasons) == -1) return -1;
+
+        clocksThrottleReasons &=  supportedThrottleReasons;
+        clocksThrottleReasons &= ~nvmlClocksThrottleReasonGpuIdle;
+        clocksThrottleReasons &= ~nvmlClocksThrottleReasonApplicationsClocksSetting;
+        clocksThrottleReasons &= ~nvmlClocksThrottleReasonUnknown;
+
+        if (backend_ctx->kernel_power_final)
+        {
+          clocksThrottleReasons &= ~nvmlClocksThrottleReasonHwSlowdown;
+        }
+
+        return (clocksThrottleReasons != nvmlClocksThrottleReasonNone);
+        */
+      }
+
+      if (hwmon_ctx->hm_nvapi)
+      {
+        NV_GPU_PERF_POLICIES_INFO_PARAMS_V1   perfPolicies_info;
+        NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1 perfPolicies_status;
+
+        memset (&perfPolicies_info,   0, sizeof (NV_GPU_PERF_POLICIES_INFO_PARAMS_V1));
+        memset (&perfPolicies_status, 0, sizeof (NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1));
+
+        perfPolicies_info.version   = MAKE_NVAPI_VERSION (NV_GPU_PERF_POLICIES_INFO_PARAMS_V1, 1);
+        perfPolicies_status.version = MAKE_NVAPI_VERSION (NV_GPU_PERF_POLICIES_STATUS_PARAMS_V1, 1);
+
+        hm_NvAPI_GPU_GetPerfPoliciesInfo (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvapi, &perfPolicies_info);
+
+        perfPolicies_status.info_value = perfPolicies_info.info_value;
+
+        hm_NvAPI_GPU_GetPerfPoliciesStatus (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].nvapi, &perfPolicies_status);
+
+        return perfPolicies_status.throttle & 2;
+      }
+    }
+  }
+
   hwmon_ctx->hm_device[backend_device_idx].throttle_get_supported = false;
 
   return -1;
@@ -2008,15 +2225,15 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
   return 0;
   #endif // WITH_HWMON
 
-  if (user_options->example_hashes    == true) return 0;
-  if (user_options->keyspace          == true) return 0;
-  if (user_options->left              == true) return 0;
-  if (user_options->opencl_info       == true) return 0;
-  if (user_options->show              == true) return 0;
-  if (user_options->stdout_flag       == true) return 0;
-  if (user_options->usage             == true) return 0;
-  if (user_options->version           == true) return 0;
-  if (user_options->hwmon_disable     == true) return 0;
+  if (user_options->example_hashes  == true) return 0;
+  if (user_options->keyspace        == true) return 0;
+  if (user_options->left            == true) return 0;
+  if (user_options->backend_info    == true) return 0;
+  if (user_options->show            == true) return 0;
+  if (user_options->stdout_flag     == true) return 0;
+  if (user_options->usage           == true) return 0;
+  if (user_options->version         == true) return 0;
+  if (user_options->hwmon_disable   == true) return 0;
 
   hwmon_ctx->hm_device = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
 
@@ -2108,34 +2325,67 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->skipped == true) continue;
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
-
-        if (device_param->opencl_device_vendor_id != VENDOR_ID_NV) continue;
-
-        for (int i = 0; i < tmp_in; i++)
+        if (device_param->is_cuda == true)
         {
-          nvmlPciInfo_t pci;
-
-          int rc = hm_NVML_nvmlDeviceGetPciInfo (hashcat_ctx, nvmlGPUHandle[i], &pci);
-
-          if (rc == -1) continue;
-
-          if ((device_param->pcie_bus      == pci.bus)
-           && (device_param->pcie_device   == (pci.device >> 3))
-           && (device_param->pcie_function == (pci.device & 7)))
+          for (int i = 0; i < tmp_in; i++)
           {
-            const u32 platform_devices_id = device_param->opencl_platform_devices_id;
+            nvmlPciInfo_t pci;
 
-            hm_adapters_nvml[platform_devices_id].nvml = nvmlGPUHandle[i];
+            int rc = hm_NVML_nvmlDeviceGetPciInfo (hashcat_ctx, nvmlGPUHandle[i], &pci);
 
-            hm_adapters_nvml[platform_devices_id].buslanes_get_supported            = true;
-            hm_adapters_nvml[platform_devices_id].corespeed_get_supported           = true;
-            hm_adapters_nvml[platform_devices_id].fanspeed_get_supported            = true;
-            hm_adapters_nvml[platform_devices_id].memoryspeed_get_supported         = true;
-            hm_adapters_nvml[platform_devices_id].temperature_get_supported         = true;
-            hm_adapters_nvml[platform_devices_id].threshold_shutdown_get_supported  = true;
-            hm_adapters_nvml[platform_devices_id].threshold_slowdown_get_supported  = true;
-            hm_adapters_nvml[platform_devices_id].utilization_get_supported         = true;
+            if (rc == -1) continue;
+
+            if ((device_param->pcie_bus      == pci.bus)
+             && (device_param->pcie_device   == (pci.device >> 3))
+             && (device_param->pcie_function == (pci.device & 7)))
+            {
+              const u32 device_id = device_param->device_id;
+
+              hm_adapters_nvml[device_id].nvml = nvmlGPUHandle[i];
+
+              hm_adapters_nvml[device_id].buslanes_get_supported            = true;
+              hm_adapters_nvml[device_id].corespeed_get_supported           = true;
+              hm_adapters_nvml[device_id].fanspeed_get_supported            = true;
+              hm_adapters_nvml[device_id].memoryspeed_get_supported         = true;
+              hm_adapters_nvml[device_id].temperature_get_supported         = true;
+              hm_adapters_nvml[device_id].threshold_shutdown_get_supported  = true;
+              hm_adapters_nvml[device_id].threshold_slowdown_get_supported  = true;
+              hm_adapters_nvml[device_id].utilization_get_supported         = true;
+            }
+          }
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+
+          if (device_param->opencl_device_vendor_id != VENDOR_ID_NV) continue;
+
+          for (int i = 0; i < tmp_in; i++)
+          {
+            nvmlPciInfo_t pci;
+
+            int rc = hm_NVML_nvmlDeviceGetPciInfo (hashcat_ctx, nvmlGPUHandle[i], &pci);
+
+            if (rc == -1) continue;
+
+            if ((device_param->pcie_bus      == pci.bus)
+             && (device_param->pcie_device   == (pci.device >> 3))
+             && (device_param->pcie_function == (pci.device & 7)))
+            {
+              const u32 device_id = device_param->device_id;
+
+              hm_adapters_nvml[device_id].nvml = nvmlGPUHandle[i];
+
+              hm_adapters_nvml[device_id].buslanes_get_supported            = true;
+              hm_adapters_nvml[device_id].corespeed_get_supported           = true;
+              hm_adapters_nvml[device_id].fanspeed_get_supported            = true;
+              hm_adapters_nvml[device_id].memoryspeed_get_supported         = true;
+              hm_adapters_nvml[device_id].temperature_get_supported         = true;
+              hm_adapters_nvml[device_id].threshold_shutdown_get_supported  = true;
+              hm_adapters_nvml[device_id].threshold_slowdown_get_supported  = true;
+              hm_adapters_nvml[device_id].utilization_get_supported         = true;
+            }
           }
         }
       }
@@ -2158,33 +2408,65 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->skipped == true) continue;
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
-
-        if (device_param->opencl_device_vendor_id != VENDOR_ID_NV) continue;
-
-        for (int i = 0; i < tmp_in; i++)
+        if (device_param->is_cuda == true)
         {
-          NvU32 BusId     = 0;
-          NvU32 BusSlotId = 0;
-
-          int rc1 = hm_NvAPI_GPU_GetBusId (hashcat_ctx, nvGPUHandle[i], &BusId);
-
-          if (rc1 == -1) continue;
-
-          int rc2 = hm_NvAPI_GPU_GetBusSlotId (hashcat_ctx, nvGPUHandle[i], &BusSlotId);
-
-          if (rc2 == -1) continue;
-
-          if ((device_param->pcie_bus      == BusId)
-           && (device_param->pcie_device   == (BusSlotId >> 3))
-           && (device_param->pcie_function == (BusSlotId & 7)))
+          for (int i = 0; i < tmp_in; i++)
           {
-            const u32 platform_devices_id = device_param->opencl_platform_devices_id;
+            NvU32 BusId     = 0;
+            NvU32 BusSlotId = 0;
 
-            hm_adapters_nvapi[platform_devices_id].nvapi = nvGPUHandle[i];
+            int rc1 = hm_NvAPI_GPU_GetBusId (hashcat_ctx, nvGPUHandle[i], &BusId);
 
-            hm_adapters_nvapi[platform_devices_id].fanpolicy_get_supported  = true;
-            hm_adapters_nvapi[platform_devices_id].throttle_get_supported   = true;
+            if (rc1 == -1) continue;
+
+            int rc2 = hm_NvAPI_GPU_GetBusSlotId (hashcat_ctx, nvGPUHandle[i], &BusSlotId);
+
+            if (rc2 == -1) continue;
+
+            if ((device_param->pcie_bus      == BusId)
+             && (device_param->pcie_device   == (BusSlotId >> 3))
+             && (device_param->pcie_function == (BusSlotId & 7)))
+            {
+              const u32 device_id = device_param->device_id;
+
+              hm_adapters_nvapi[device_id].nvapi = nvGPUHandle[i];
+
+              hm_adapters_nvapi[device_id].fanpolicy_get_supported  = true;
+              hm_adapters_nvapi[device_id].throttle_get_supported   = true;
+            }
+          }
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+
+          if (device_param->opencl_device_vendor_id != VENDOR_ID_NV) continue;
+
+          for (int i = 0; i < tmp_in; i++)
+          {
+            NvU32 BusId     = 0;
+            NvU32 BusSlotId = 0;
+
+            int rc1 = hm_NvAPI_GPU_GetBusId (hashcat_ctx, nvGPUHandle[i], &BusId);
+
+            if (rc1 == -1) continue;
+
+            int rc2 = hm_NvAPI_GPU_GetBusSlotId (hashcat_ctx, nvGPUHandle[i], &BusSlotId);
+
+            if (rc2 == -1) continue;
+
+            if ((device_param->pcie_bus      == BusId)
+             && (device_param->pcie_device   == (BusSlotId >> 3))
+             && (device_param->pcie_function == (BusSlotId & 7)))
+            {
+              const u32 device_id = device_param->device_id;
+
+              hm_adapters_nvapi[device_id].nvapi = nvGPUHandle[i];
+
+              hm_adapters_nvapi[device_id].fanpolicy_get_supported  = true;
+              hm_adapters_nvapi[device_id].throttle_get_supported   = true;
+            }
           }
         }
       }
@@ -2227,36 +2509,44 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->skipped == true) continue;
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
-
-        if (device_param->opencl_device_vendor_id != VENDOR_ID_AMD) continue;
-
-        for (int i = 0; i < tmp_in; i++)
+        if (device_param->is_cuda == true)
         {
-          if ((device_param->pcie_bus      == lpAdapterInfo[i].iBusNumber)
-           && (device_param->pcie_device   == (lpAdapterInfo[i].iDeviceNumber >> 3))
-           && (device_param->pcie_function == (lpAdapterInfo[i].iDeviceNumber & 7)))
+          // nothing to do
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+
+          if (device_param->opencl_device_vendor_id != VENDOR_ID_AMD) continue;
+
+          for (int i = 0; i < tmp_in; i++)
           {
-            const u32 platform_devices_id = device_param->opencl_platform_devices_id;
+            if ((device_param->pcie_bus      == lpAdapterInfo[i].iBusNumber)
+             && (device_param->pcie_device   == (lpAdapterInfo[i].iDeviceNumber >> 3))
+             && (device_param->pcie_function == (lpAdapterInfo[i].iDeviceNumber & 7)))
+            {
+              const u32 device_id = device_param->device_id;
 
-            int od_supported = 0;
-            int od_enabled   = 0;
-            int od_version   = 0;
+              int od_supported = 0;
+              int od_enabled   = 0;
+              int od_version   = 0;
 
-            hm_ADL_Overdrive_Caps (hashcat_ctx, lpAdapterInfo[i].iAdapterIndex, &od_supported, &od_enabled, &od_version);
+              hm_ADL_Overdrive_Caps (hashcat_ctx, lpAdapterInfo[i].iAdapterIndex, &od_supported, &od_enabled, &od_version);
 
-            hm_adapters_adl[platform_devices_id].od_version = od_version;
+              hm_adapters_adl[device_id].od_version = od_version;
 
-            hm_adapters_adl[platform_devices_id].adl = lpAdapterInfo[i].iAdapterIndex;
+              hm_adapters_adl[device_id].adl = lpAdapterInfo[i].iAdapterIndex;
 
-            hm_adapters_adl[platform_devices_id].buslanes_get_supported            = true;
-            hm_adapters_adl[platform_devices_id].corespeed_get_supported           = true;
-            hm_adapters_adl[platform_devices_id].fanspeed_get_supported            = true;
-            hm_adapters_adl[platform_devices_id].fanpolicy_get_supported           = true;
-            hm_adapters_adl[platform_devices_id].memoryspeed_get_supported         = true;
-            hm_adapters_adl[platform_devices_id].temperature_get_supported         = true;
-            hm_adapters_adl[platform_devices_id].threshold_slowdown_get_supported  = true;
-            hm_adapters_adl[platform_devices_id].utilization_get_supported         = true;
+              hm_adapters_adl[device_id].buslanes_get_supported            = true;
+              hm_adapters_adl[device_id].corespeed_get_supported           = true;
+              hm_adapters_adl[device_id].fanspeed_get_supported            = true;
+              hm_adapters_adl[device_id].fanpolicy_get_supported           = true;
+              hm_adapters_adl[device_id].memoryspeed_get_supported         = true;
+              hm_adapters_adl[device_id].temperature_get_supported         = true;
+              hm_adapters_adl[device_id].threshold_slowdown_get_supported  = true;
+              hm_adapters_adl[device_id].utilization_get_supported         = true;
+            }
           }
         }
       }
@@ -2275,18 +2565,26 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
       {
         hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+        if (device_param->is_cuda == true)
+        {
+          // nothing to do
+        }
 
-        hm_adapters_sysfs[hm_adapters_id].sysfs = backend_devices_idx; // ????
+        if (device_param->is_opencl == true)
+        {
+          if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
 
-        hm_adapters_sysfs[hm_adapters_id].buslanes_get_supported    = true;
-        hm_adapters_sysfs[hm_adapters_id].corespeed_get_supported   = true;
-        hm_adapters_sysfs[hm_adapters_id].fanspeed_get_supported    = true;
-        hm_adapters_sysfs[hm_adapters_id].fanpolicy_get_supported   = true;
-        hm_adapters_sysfs[hm_adapters_id].memoryspeed_get_supported = true;
-        hm_adapters_sysfs[hm_adapters_id].temperature_get_supported = true;
+          hm_adapters_sysfs[hm_adapters_id].sysfs = backend_devices_idx; // ????
 
-        hm_adapters_id++;
+          hm_adapters_sysfs[hm_adapters_id].buslanes_get_supported    = true;
+          hm_adapters_sysfs[hm_adapters_id].corespeed_get_supported   = true;
+          hm_adapters_sysfs[hm_adapters_id].fanspeed_get_supported    = true;
+          hm_adapters_sysfs[hm_adapters_id].fanpolicy_get_supported   = true;
+          hm_adapters_sysfs[hm_adapters_id].memoryspeed_get_supported = true;
+          hm_adapters_sysfs[hm_adapters_id].temperature_get_supported = true;
+
+          hm_adapters_id++;
+        }
       }
     }
   }
@@ -2320,83 +2618,123 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->skipped == true) continue;
 
-    if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+    const u32 device_id = device_param->device_id;
 
-    const u32 platform_devices_id = device_param->opencl_platform_devices_id;
-
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      hwmon_ctx->hm_device[backend_devices_idx].adl         = hm_adapters_adl[platform_devices_id].adl;
-      hwmon_ctx->hm_device[backend_devices_idx].sysfs       = hm_adapters_sysfs[platform_devices_id].sysfs;
-      hwmon_ctx->hm_device[backend_devices_idx].nvapi       = 0;
-      hwmon_ctx->hm_device[backend_devices_idx].nvml        = 0;
-      hwmon_ctx->hm_device[backend_devices_idx].od_version  = 0;
-
-      if (hwmon_ctx->hm_adl)
-      {
-        hwmon_ctx->hm_device[backend_devices_idx].od_version = hm_adapters_adl[platform_devices_id].od_version;
-
-        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_adl[platform_devices_id].buslanes_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_adl[platform_devices_id].corespeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_adl[platform_devices_id].fanspeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_adl[platform_devices_id].fanpolicy_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_adl[platform_devices_id].memoryspeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_adl[platform_devices_id].temperature_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_adl[platform_devices_id].threshold_shutdown_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_adl[platform_devices_id].threshold_slowdown_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_adl[platform_devices_id].throttle_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_adl[platform_devices_id].utilization_get_supported;
-      }
-
-      if (hwmon_ctx->hm_sysfs)
-      {
-        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_sysfs[platform_devices_id].buslanes_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_sysfs[platform_devices_id].corespeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_sysfs[platform_devices_id].fanspeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_sysfs[platform_devices_id].fanpolicy_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_sysfs[platform_devices_id].memoryspeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_sysfs[platform_devices_id].temperature_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_sysfs[platform_devices_id].threshold_shutdown_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_sysfs[platform_devices_id].threshold_slowdown_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_sysfs[platform_devices_id].throttle_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_sysfs[platform_devices_id].utilization_get_supported;
-      }
-    }
-
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
+    if (device_param->is_cuda == true)
     {
       hwmon_ctx->hm_device[backend_devices_idx].adl         = 0;
       hwmon_ctx->hm_device[backend_devices_idx].sysfs       = 0;
-      hwmon_ctx->hm_device[backend_devices_idx].nvapi       = hm_adapters_nvapi[platform_devices_id].nvapi;
-      hwmon_ctx->hm_device[backend_devices_idx].nvml        = hm_adapters_nvml[platform_devices_id].nvml;
+      hwmon_ctx->hm_device[backend_devices_idx].nvapi       = hm_adapters_nvapi[device_id].nvapi;
+      hwmon_ctx->hm_device[backend_devices_idx].nvml        = hm_adapters_nvml[device_id].nvml;
       hwmon_ctx->hm_device[backend_devices_idx].od_version  = 0;
 
       if (hwmon_ctx->hm_nvml)
       {
-        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_nvml[platform_devices_id].buslanes_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_nvml[platform_devices_id].corespeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_nvml[platform_devices_id].fanspeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_nvml[platform_devices_id].fanpolicy_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_nvml[platform_devices_id].memoryspeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_nvml[platform_devices_id].temperature_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_nvml[platform_devices_id].threshold_shutdown_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_nvml[platform_devices_id].threshold_slowdown_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_nvml[platform_devices_id].throttle_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_nvml[platform_devices_id].utilization_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_nvml[device_id].buslanes_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_nvml[device_id].corespeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_nvml[device_id].fanspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_nvml[device_id].fanpolicy_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_nvml[device_id].memoryspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_nvml[device_id].temperature_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_nvml[device_id].threshold_shutdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_nvml[device_id].threshold_slowdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_nvml[device_id].throttle_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_nvml[device_id].utilization_get_supported;
       }
 
       if (hwmon_ctx->hm_nvapi)
       {
-        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_nvapi[platform_devices_id].buslanes_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_nvapi[platform_devices_id].corespeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_nvapi[platform_devices_id].fanspeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_nvapi[platform_devices_id].fanpolicy_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_nvapi[platform_devices_id].memoryspeed_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_nvapi[platform_devices_id].temperature_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_nvapi[platform_devices_id].threshold_shutdown_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_nvapi[platform_devices_id].threshold_slowdown_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_nvapi[platform_devices_id].throttle_get_supported;
-        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_nvapi[platform_devices_id].utilization_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_nvapi[device_id].buslanes_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_nvapi[device_id].corespeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_nvapi[device_id].fanspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_nvapi[device_id].fanpolicy_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_nvapi[device_id].memoryspeed_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_nvapi[device_id].temperature_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_nvapi[device_id].threshold_shutdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_nvapi[device_id].threshold_slowdown_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_nvapi[device_id].throttle_get_supported;
+        hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_nvapi[device_id].utilization_get_supported;
+      }
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+
+      if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
+      {
+        hwmon_ctx->hm_device[backend_devices_idx].adl         = hm_adapters_adl[device_id].adl;
+        hwmon_ctx->hm_device[backend_devices_idx].sysfs       = hm_adapters_sysfs[device_id].sysfs;
+        hwmon_ctx->hm_device[backend_devices_idx].nvapi       = 0;
+        hwmon_ctx->hm_device[backend_devices_idx].nvml        = 0;
+        hwmon_ctx->hm_device[backend_devices_idx].od_version  = 0;
+
+        if (hwmon_ctx->hm_adl)
+        {
+          hwmon_ctx->hm_device[backend_devices_idx].od_version = hm_adapters_adl[device_id].od_version;
+
+          hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_adl[device_id].buslanes_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_adl[device_id].corespeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_adl[device_id].fanspeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_adl[device_id].fanpolicy_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_adl[device_id].memoryspeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_adl[device_id].temperature_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_adl[device_id].threshold_shutdown_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_adl[device_id].threshold_slowdown_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_adl[device_id].throttle_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_adl[device_id].utilization_get_supported;
+        }
+
+        if (hwmon_ctx->hm_sysfs)
+        {
+          hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_sysfs[device_id].buslanes_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_sysfs[device_id].corespeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_sysfs[device_id].fanspeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_sysfs[device_id].fanpolicy_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_sysfs[device_id].memoryspeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_sysfs[device_id].temperature_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_sysfs[device_id].threshold_shutdown_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_sysfs[device_id].threshold_slowdown_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_sysfs[device_id].throttle_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_sysfs[device_id].utilization_get_supported;
+        }
+      }
+
+      if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
+      {
+        hwmon_ctx->hm_device[backend_devices_idx].adl         = 0;
+        hwmon_ctx->hm_device[backend_devices_idx].sysfs       = 0;
+        hwmon_ctx->hm_device[backend_devices_idx].nvapi       = hm_adapters_nvapi[device_id].nvapi;
+        hwmon_ctx->hm_device[backend_devices_idx].nvml        = hm_adapters_nvml[device_id].nvml;
+        hwmon_ctx->hm_device[backend_devices_idx].od_version  = 0;
+
+        if (hwmon_ctx->hm_nvml)
+        {
+          hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_nvml[device_id].buslanes_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_nvml[device_id].corespeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_nvml[device_id].fanspeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_nvml[device_id].fanpolicy_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_nvml[device_id].memoryspeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_nvml[device_id].temperature_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_nvml[device_id].threshold_shutdown_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_nvml[device_id].threshold_slowdown_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_nvml[device_id].throttle_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_nvml[device_id].utilization_get_supported;
+        }
+
+        if (hwmon_ctx->hm_nvapi)
+        {
+          hwmon_ctx->hm_device[backend_devices_idx].buslanes_get_supported            |= hm_adapters_nvapi[device_id].buslanes_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].corespeed_get_supported           |= hm_adapters_nvapi[device_id].corespeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].fanspeed_get_supported            |= hm_adapters_nvapi[device_id].fanspeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].fanpolicy_get_supported           |= hm_adapters_nvapi[device_id].fanpolicy_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].memoryspeed_get_supported         |= hm_adapters_nvapi[device_id].memoryspeed_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].temperature_get_supported         |= hm_adapters_nvapi[device_id].temperature_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].threshold_shutdown_get_supported  |= hm_adapters_nvapi[device_id].threshold_shutdown_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].threshold_slowdown_get_supported  |= hm_adapters_nvapi[device_id].threshold_slowdown_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].throttle_get_supported            |= hm_adapters_nvapi[device_id].throttle_get_supported;
+          hwmon_ctx->hm_device[backend_devices_idx].utilization_get_supported         |= hm_adapters_nvapi[device_id].utilization_get_supported;
+        }
       }
     }
 
diff --git a/src/induct.c b/src/induct.c
index 42e39e11a..46dc26eb2 100644
--- a/src/induct.c
+++ b/src/induct.c
@@ -43,7 +43,7 @@ int induct_ctx_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->stdout_flag    == true) return 0;
   if (user_options->speed_only     == true) return 0;
diff --git a/src/loopback.c b/src/loopback.c
index db4ab8c1d..057c1e456 100644
--- a/src/loopback.c
+++ b/src/loopback.c
@@ -64,7 +64,7 @@ int loopback_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->stdout_flag    == true) return 0;
   if (user_options->speed_only     == true) return 0;
diff --git a/src/main.c b/src/main.c
index 9cce0b835..48c357c1a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -190,7 +190,7 @@ static void main_outerloop_starting (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MA
 
   status_ctx->shutdown_outer = false;
 
-  if ((user_options->example_hashes == false) && (user_options->keyspace == false) && (user_options->stdout_flag == false) && (user_options->opencl_info == false) && (user_options->speed_only == false))
+  if ((user_options->example_hashes == false) && (user_options->keyspace == false) && (user_options->stdout_flag == false) && (user_options->backend_info == false) && (user_options->speed_only == false))
   {
     if ((user_options_extra->wordlist_mode == WL_MODE_FILE) || (user_options_extra->wordlist_mode == WL_MODE_MASK))
     {
@@ -263,7 +263,7 @@ static void main_cracker_finished (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYB
 
   if (user_options->example_hashes  == true) return;
   if (user_options->keyspace        == true) return;
-  if (user_options->opencl_info     == true) return;
+  if (user_options->backend_info    == true) return;
   if (user_options->stdout_flag     == true) return;
 
   // if we had a prompt, clear it
@@ -512,9 +512,9 @@ static void main_outerloop_mainscreen (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx,
   {
     if (hashconfig->has_optimized_kernel == true)
     {
-      event_log_advice (hashcat_ctx, "ATTENTION! Pure (unoptimized) OpenCL kernels selected.");
+      event_log_advice (hashcat_ctx, "ATTENTION! Pure (unoptimized) backend kernels selected.");
       event_log_advice (hashcat_ctx, "Using pure kernels enables cracking longer passwords but for the price of drastically reduced performance.");
-      event_log_advice (hashcat_ctx, "If you want to switch to optimized OpenCL kernels, append -O to your commandline.");
+      event_log_advice (hashcat_ctx, "If you want to switch to optimized backend kernels, append -O to your commandline.");
       event_log_advice (hashcat_ctx, "See the above message to find out about the exact limits.");
       event_log_advice (hashcat_ctx, NULL);
     }
@@ -567,7 +567,7 @@ static void main_backend_session_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx,
   event_log_info_nn (hashcat_ctx, "Initialized device kernels and memory...");
 }
 
-static void main_opencl_device_init_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
+static void main_backend_device_init_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
 
@@ -575,10 +575,10 @@ static void main_opencl_device_init_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx
 
   const u32 *device_id = (const u32 *) buf;
 
-  event_log_info_nn (hashcat_ctx, "Initializing OpenCL runtime for device #%u...", *device_id + 1);
+  event_log_info_nn (hashcat_ctx, "Initializing backend runtime for device #%u...", *device_id + 1);
 }
 
-static void main_opencl_device_init_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
+static void main_backend_device_init_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
 
@@ -586,7 +586,7 @@ static void main_opencl_device_init_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ct
 
   const u32 *device_id = (const u32 *) buf;
 
-  event_log_info_nn (hashcat_ctx, "Initialized OpenCL runtime for device #%u...", *device_id + 1);
+  event_log_info_nn (hashcat_ctx, "Initialized backend runtime for device #%u...", *device_id + 1);
 }
 
 static void main_bitmap_init_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
@@ -735,7 +735,7 @@ static void main_monitor_performance_hint (MAYBE_UNUSED hashcat_ctx_t *hashcat_c
     event_log_advice (hashcat_ctx, NULL);
   }
 
-  event_log_advice (hashcat_ctx, "* Update your OpenCL runtime / driver the right way:");
+  event_log_advice (hashcat_ctx, "* Update your backend API runtime / driver the right way:");
   event_log_advice (hashcat_ctx, "  https://hashcat.net/faq/wrongdriver");
   event_log_advice (hashcat_ctx, NULL);
   event_log_advice (hashcat_ctx, "* Create more work items to make use of your parallelization power:");
@@ -1022,10 +1022,10 @@ static void event (const u32 id, hashcat_ctx_t *hashcat_ctx, const void *buf, co
     case EVENT_MONITOR_PERFORMANCE_HINT:  main_monitor_performance_hint  (hashcat_ctx, buf, len); break;
     case EVENT_MONITOR_NOINPUT_HINT:      main_monitor_noinput_hint      (hashcat_ctx, buf, len); break;
     case EVENT_MONITOR_NOINPUT_ABORT:     main_monitor_noinput_abort     (hashcat_ctx, buf, len); break;
-    case EVENT_OPENCL_SESSION_POST:       main_backend_session_post      (hashcat_ctx, buf, len); break;
-    case EVENT_OPENCL_SESSION_PRE:        main_backend_session_pre       (hashcat_ctx, buf, len); break;
-    case EVENT_OPENCL_DEVICE_INIT_POST:   main_opencl_device_init_post   (hashcat_ctx, buf, len); break;
-    case EVENT_OPENCL_DEVICE_INIT_PRE:    main_opencl_device_init_pre    (hashcat_ctx, buf, len); break;
+    case EVENT_BACKEND_SESSION_POST:      main_backend_session_post      (hashcat_ctx, buf, len); break;
+    case EVENT_BACKEND_SESSION_PRE:       main_backend_session_pre       (hashcat_ctx, buf, len); break;
+    case EVENT_BACKEND_DEVICE_INIT_POST:  main_backend_device_init_post  (hashcat_ctx, buf, len); break;
+    case EVENT_BACKEND_DEVICE_INIT_PRE:   main_backend_device_init_pre   (hashcat_ctx, buf, len); break;
     case EVENT_OUTERLOOP_FINISHED:        main_outerloop_finished        (hashcat_ctx, buf, len); break;
     case EVENT_OUTERLOOP_MAINSCREEN:      main_outerloop_mainscreen      (hashcat_ctx, buf, len); break;
     case EVENT_OUTERLOOP_STARTING:        main_outerloop_starting        (hashcat_ctx, buf, len); break;
@@ -1106,7 +1106,7 @@ int main (int argc, char **argv)
     return 0;
   }
 
-  // init a hashcat session; this initializes opencl devices, hwmon, etc
+  // init a hashcat session; this initializes backend devices, hwmon, etc
 
   welcome_screen (hashcat_ctx, VERSION_TAG);
 
@@ -1128,11 +1128,11 @@ int main (int argc, char **argv)
 
       rc_final = 0;
     }
-    else if (user_options->opencl_info == true)
+    else if (user_options->backend_info == true)
     {
-      // if this is just opencl_info, no need to execute some real cracking session
+      // if this is just backend_info, no need to execute some real cracking session
 
-      opencl_info (hashcat_ctx);
+      backend_info (hashcat_ctx);
 
       rc_final = 0;
     }
@@ -1140,7 +1140,7 @@ int main (int argc, char **argv)
     {
       // now execute hashcat
 
-      opencl_info_compact (hashcat_ctx);
+      backend_info_compact (hashcat_ctx);
 
       user_options_info (hashcat_ctx);
 
@@ -1148,7 +1148,7 @@ int main (int argc, char **argv)
     }
   }
 
-  // finish the hashcat session, this shuts down opencl devices, hwmon, etc
+  // finish the hashcat session, this shuts down backend devices, hwmon, etc
 
   hashcat_session_destroy (hashcat_ctx);
 
diff --git a/src/mpsp.c b/src/mpsp.c
index c81f6c2e5..c3a54e47d 100644
--- a/src/mpsp.c
+++ b/src/mpsp.c
@@ -1398,7 +1398,7 @@ int mask_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->example_hashes == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->usage          == true) return 0;
   if (user_options->version        == true) return 0;
diff --git a/src/outfile_check.c b/src/outfile_check.c
index 4a63e77c8..db15049c0 100644
--- a/src/outfile_check.c
+++ b/src/outfile_check.c
@@ -363,7 +363,7 @@ int outcheck_ctx_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->example_hashes == true) return 0;
   if (user_options->speed_only     == true) return 0;
   if (user_options->progress_only  == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
 
   if (hashconfig->outfile_check_disable == true) return 0;
 
diff --git a/src/potfile.c b/src/potfile.c
index b3015e04e..9516d4ae5 100644
--- a/src/potfile.c
+++ b/src/potfile.c
@@ -103,7 +103,7 @@ int potfile_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->benchmark       == true) return 0;
   if (user_options->example_hashes  == true) return 0;
   if (user_options->keyspace        == true) return 0;
-  if (user_options->opencl_info     == true) return 0;
+  if (user_options->backend_info    == true) return 0;
   if (user_options->stdout_flag     == true) return 0;
   if (user_options->speed_only      == true) return 0;
   if (user_options->progress_only   == true) return 0;
diff --git a/src/restore.c b/src/restore.c
index 042555433..8f61cc6ae 100644
--- a/src/restore.c
+++ b/src/restore.c
@@ -304,7 +304,7 @@ int restore_ctx_init (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
   if (user_options->example_hashes  == true) return 0;
   if (user_options->keyspace        == true) return 0;
   if (user_options->left            == true) return 0;
-  if (user_options->opencl_info     == true) return 0;
+  if (user_options->backend_info    == true) return 0;
   if (user_options->show            == true) return 0;
   if (user_options->stdout_flag     == true) return 0;
   if (user_options->speed_only      == true) return 0;
diff --git a/src/straight.c b/src/straight.c
index 7b45825cd..0581838ec 100644
--- a/src/straight.c
+++ b/src/straight.c
@@ -214,7 +214,7 @@ int straight_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->example_hashes == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->usage          == true) return 0;
   if (user_options->version        == true) return 0;
diff --git a/src/terminal.c b/src/terminal.c
index 9d9d2ddcc..2ffd2df55 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -654,7 +654,7 @@ void example_hashes (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-void opencl_info (hashcat_ctx_t *hashcat_ctx)
+void backend_info (hashcat_ctx_t *hashcat_ctx)
 {
   const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
@@ -684,7 +684,7 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
 
       if (device_param->device_id_alias_cnt)
       {
-        event_log_info (hashcat_ctx, "Backend Device ID #%d (alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+        event_log_info (hashcat_ctx, "Backend Device ID #%d (Alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
       }
       else
       {
@@ -745,7 +745,7 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->device_id_alias_cnt)
         {
-          event_log_info (hashcat_ctx, "  Backend Device ID #%d (alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+          event_log_info (hashcat_ctx, "  Backend Device ID #%d (Alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
         }
         else
         {
@@ -768,7 +768,7 @@ void opencl_info (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-void opencl_info_compact (hashcat_ctx_t *hashcat_ctx)
+void backend_info_compact (hashcat_ctx_t *hashcat_ctx)
 {
   const backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   const user_options_t *user_options = hashcat_ctx->user_options;
diff --git a/src/tuningdb.c b/src/tuningdb.c
index 91425f306..db88cf57a 100644
--- a/src/tuningdb.c
+++ b/src/tuningdb.c
@@ -57,7 +57,7 @@ int tuning_db_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->example_hashes == true) return 0;
   if (user_options->keyspace       == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->show           == true) return 0;
   if (user_options->usage          == true) return 0;
   if (user_options->version        == true) return 0;
diff --git a/src/usage.c b/src/usage.c
index 2b564568e..b6d36d379 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -89,15 +89,15 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   "     --bitmap-max               | Num  | Sets maximum bits allowed for bitmaps to X           | --bitmap-max=24",
   "     --cpu-affinity             | Str  | Locks to CPU devices, separated with commas          | --cpu-affinity=1,2,3",
   "     --example-hashes           |      | Show an example hash for each hash-mode              |",
-  " -I, --opencl-info              |      | Show info about detected OpenCL platforms/devices    | -I",
-  " -d, --opencl-devices           | Str  | OpenCL devices to use, separated with commas         | -d 1",
+  " -I, --backend-info             |      | Show info about detected backend API devices         | -I",
+  " -d, --backend-devices          | Str  | Backend devices to use, separated with commas        | -d 1",
   " -D, --opencl-device-types      | Str  | OpenCL device-types to use, separated with commas    | -D 1",
-  "     --opencl-vector-width      | Num  | Manually override OpenCL vector-width to X           | --opencl-vector=4",
   " -O, --optimized-kernel-enable  |      | Enable optimized kernels (limits password length)    |",
   " -w, --workload-profile         | Num  | Enable a specific workload profile, see pool below   | -w 3",
   " -n, --kernel-accel             | Num  | Manual workload tuning, set outerloop step size to X | -n 64",
   " -u, --kernel-loops             | Num  | Manual workload tuning, set innerloop step size to X | -u 256",
   " -T, --kernel-threads           | Num  | Manual workload tuning, set thread count to X        | -T 64",
+  "     --backend-vector-width     | Num  | Manually override backend vector-width to X          | --backend-vector=4",
   "     --spin-damp                | Num  | Use CPU for device synchronization, in percent       | --spin-damp=50",
   "     --hwmon-disable            |      | Disable temperature and fanspeed reads and triggers  |",
   "     --hwmon-temp-abort         | Num  | Abort if temperature reaches X degrees Celsius       | --hwmon-temp-abort=100",
diff --git a/src/user_options.c b/src/user_options.c
index 2691c565a..89a4b14b2 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -28,6 +28,9 @@ static const struct option long_options[] =
 {
   {"advice-disable",            no_argument,       NULL, IDX_ADVICE_DISABLE},
   {"attack-mode",               required_argument, NULL, IDX_ATTACK_MODE},
+  {"backend-devices",           required_argument, NULL, IDX_BACKEND_DEVICES},
+  {"backend-info",              no_argument,       NULL, IDX_BACKEND_INFO},
+  {"backend-vector-width",      required_argument, NULL, IDX_BACKEND_VECTOR_WIDTH},
   {"benchmark-all",             no_argument,       NULL, IDX_BENCHMARK_ALL},
   {"benchmark",                 no_argument,       NULL, IDX_BENCHMARK},
   {"bitmap-max",                required_argument, NULL, IDX_BITMAP_MAX},
@@ -75,10 +78,7 @@ static const struct option long_options[] =
   {"markov-hcstat2",            required_argument, NULL, IDX_MARKOV_HCSTAT2},
   {"markov-threshold",          required_argument, NULL, IDX_MARKOV_THRESHOLD},
   {"nonce-error-corrections",   required_argument, NULL, IDX_NONCE_ERROR_CORRECTIONS},
-  {"opencl-devices",            required_argument, NULL, IDX_OPENCL_DEVICES},
   {"opencl-device-types",       required_argument, NULL, IDX_OPENCL_DEVICE_TYPES},
-  {"opencl-info",               no_argument,       NULL, IDX_OPENCL_INFO},
-  {"opencl-vector-width",       required_argument, NULL, IDX_OPENCL_VECTOR_WIDTH},
   {"optimized-kernel-enable",   no_argument,       NULL, IDX_OPTIMIZED_KERNEL_ENABLE},
   {"outfile-autohex-disable",   no_argument,       NULL, IDX_OUTFILE_AUTOHEX_DISABLE},
   {"outfile-check-dir",         required_argument, NULL, IDX_OUTFILE_CHECK_DIR},
@@ -151,6 +151,9 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx)
 
   user_options->advice_disable            = ADVICE_DISABLE;
   user_options->attack_mode               = ATTACK_MODE;
+  user_options->backend_devices           = NULL;
+  user_options->backend_info              = BACKEND_INFO;
+  user_options->backend_vector_width      = BACKEND_VECTOR_WIDTH;
   user_options->benchmark_all             = BENCHMARK_ALL;
   user_options->benchmark                 = BENCHMARK;
   user_options->bitmap_max                = BITMAP_MAX;
@@ -202,10 +205,7 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx)
   user_options->markov_hcstat2            = NULL;
   user_options->markov_threshold          = MARKOV_THRESHOLD;
   user_options->nonce_error_corrections   = NONCE_ERROR_CORRECTIONS;
-  user_options->opencl_devices            = NULL;
   user_options->opencl_device_types       = NULL;
-  user_options->opencl_info               = OPENCL_INFO;
-  user_options->opencl_vector_width       = OPENCL_VECTOR_WIDTH;
   user_options->optimized_kernel_enable   = OPTIMIZED_KERNEL_ENABLE;
   user_options->outfile_autohex           = OUTFILE_AUTOHEX;
   user_options->outfile_check_dir         = NULL;
@@ -304,7 +304,7 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
       case IDX_MARKOV_THRESHOLD:
       case IDX_OUTFILE_FORMAT:
       case IDX_OUTFILE_CHECK_TIMER:
-      case IDX_OPENCL_VECTOR_WIDTH:
+      case IDX_BACKEND_VECTOR_WIDTH:
       case IDX_WORKLOAD_PROFILE:
       case IDX_KERNEL_ACCEL:
       case IDX_KERNEL_LOOPS:
@@ -423,11 +423,11 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
       case IDX_HEX_SALT:                  user_options->hex_salt                  = true;                            break;
       case IDX_HEX_WORDLIST:              user_options->hex_wordlist              = true;                            break;
       case IDX_CPU_AFFINITY:              user_options->cpu_affinity              = optarg;                          break;
-      case IDX_OPENCL_INFO:               user_options->opencl_info               = true;                            break;
-      case IDX_OPENCL_DEVICES:            user_options->opencl_devices            = optarg;                          break;
+      case IDX_BACKEND_INFO:              user_options->backend_info              = true;                            break;
+      case IDX_BACKEND_DEVICES:           user_options->backend_devices           = optarg;                          break;
+      case IDX_BACKEND_VECTOR_WIDTH:      user_options->backend_vector_width      = hc_strtoul (optarg, NULL, 10);
+                                          user_options->backend_vector_width_chgd = true;                            break;
       case IDX_OPENCL_DEVICE_TYPES:       user_options->opencl_device_types       = optarg;                          break;
-      case IDX_OPENCL_VECTOR_WIDTH:       user_options->opencl_vector_width       = hc_strtoul (optarg, NULL, 10);
-                                          user_options->opencl_vector_width_chgd  = true;                            break;
       case IDX_OPTIMIZED_KERNEL_ENABLE:   user_options->optimized_kernel_enable   = true;                            break;
       case IDX_WORKLOAD_PROFILE:          user_options->workload_profile          = hc_strtoul (optarg, NULL, 10);
                                           user_options->workload_profile_chgd     = true;                            break;
@@ -856,11 +856,11 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
     return -1;
   }
 
-  if (user_options->opencl_vector_width_chgd == true)
+  if (user_options->backend_vector_width_chgd == true)
   {
-    if (is_power_of_2 (user_options->opencl_vector_width) == false || user_options->opencl_vector_width > 16)
+    if (is_power_of_2 (user_options->backend_vector_width) == false || user_options->backend_vector_width > 16)
     {
-      event_log_error (hashcat_ctx, "opencl-vector-width %u is not allowed.", user_options->opencl_vector_width);
+      event_log_error (hashcat_ctx, "backend-vector-width %u is not allowed.", user_options->backend_vector_width);
 
       return -1;
     }
@@ -1087,11 +1087,11 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if (user_options->opencl_devices != NULL)
+  if (user_options->backend_devices != NULL)
   {
-    if (strlen (user_options->opencl_devices) == 0)
+    if (strlen (user_options->backend_devices) == 0)
     {
-      event_log_error (hashcat_ctx, "Invalid --opencl-devices value - must not be empty.");
+      event_log_error (hashcat_ctx, "Invalid --backend-devices value - must not be empty.");
 
       return -1;
     }
@@ -1220,7 +1220,7 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
       show_error = false;
     }
   }
-  else if (user_options->opencl_info == true)
+  else if (user_options->backend_info == true)
   {
     if (user_options->hc_argc == 0)
     {
@@ -1420,9 +1420,9 @@ void user_options_session_auto (hashcat_ctx_t *hashcat_ctx)
       user_options->session = "stdout";
     }
 
-    if (user_options->opencl_info == true)
+    if (user_options->backend_info == true)
     {
-      user_options->session = "opencl_info";
+      user_options->session = "backend_info";
     }
 
     if (user_options->show == true)
@@ -1469,7 +1469,7 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
   }
 
   if (user_options->example_hashes  == true
-   || user_options->opencl_info     == true
+   || user_options->backend_info    == true
    || user_options->keyspace        == true
    || user_options->speed_only      == true
    || user_options->progress_only   == true
@@ -1546,17 +1546,17 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->slow_candidates == true)
   {
-    user_options->opencl_vector_width = 1;
+    user_options->backend_vector_width = 1;
   }
 
   if (user_options->stdout_flag == true)
   {
-    user_options->force               = true;
-    user_options->hash_mode           = 2000;
-    user_options->kernel_accel        = 1024;
-    user_options->opencl_vector_width = 1;
-    user_options->outfile_format      = OUTFILE_FMT_PLAIN;
-    user_options->quiet               = true;
+    user_options->force                 = true;
+    user_options->hash_mode             = 2000;
+    user_options->kernel_accel          = 1024;
+    user_options->backend_vector_width  = 1;
+    user_options->outfile_format        = OUTFILE_FMT_PLAIN;
+    user_options->quiet                 = true;
 
     if (user_options->attack_mode == ATTACK_MODE_STRAIGHT)
     {
@@ -1580,9 +1580,9 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if (user_options->opencl_info == true)
+  if (user_options->backend_info == true)
   {
-    user_options->opencl_devices      = NULL;
+    user_options->backend_devices     = NULL;
     user_options->opencl_device_types = hcstrdup ("1,2,3");
     user_options->quiet               = true;
   }
@@ -1638,7 +1638,7 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
     {
 
     }
-    else if (user_options->opencl_info == true)
+    else if (user_options->backend_info == true)
     {
 
     }
@@ -1718,9 +1718,9 @@ void user_options_info (hashcat_ctx_t *hashcat_ctx)
       event_log_info (hashcat_ctx, "* --force");
     }
 
-    if (user_options->opencl_devices)
+    if (user_options->backend_devices)
     {
-      event_log_info (hashcat_ctx, "* --opencl-devices=%s", user_options->opencl_devices);
+      event_log_info (hashcat_ctx, "* --backend-devices=%s", user_options->backend_devices);
     }
 
     if (user_options->opencl_device_types)
@@ -1733,9 +1733,9 @@ void user_options_info (hashcat_ctx_t *hashcat_ctx)
       event_log_info (hashcat_ctx, "* --optimized-kernel-enable");
     }
 
-    if (user_options->opencl_vector_width_chgd == true)
+    if (user_options->backend_vector_width_chgd == true)
     {
-      event_log_info (hashcat_ctx, "* --opencl-vector-width=%u", user_options->opencl_vector_width);
+      event_log_info (hashcat_ctx, "* --backend-vector-width=%u", user_options->backend_vector_width);
     }
 
     if (user_options->kernel_accel_chgd == true)
@@ -1772,9 +1772,9 @@ void user_options_info (hashcat_ctx_t *hashcat_ctx)
       event_log_info (hashcat_ctx, "# option: --force");
     }
 
-    if (user_options->opencl_devices)
+    if (user_options->backend_devices)
     {
-      event_log_info (hashcat_ctx, "# option: --opencl-devices=%s", user_options->opencl_devices);
+      event_log_info (hashcat_ctx, "# option: --backend-devices=%s", user_options->backend_devices);
     }
 
     if (user_options->opencl_device_types)
@@ -1787,9 +1787,9 @@ void user_options_info (hashcat_ctx_t *hashcat_ctx)
       event_log_info (hashcat_ctx, "# option: --optimized-kernel-enable");
     }
 
-    if (user_options->opencl_vector_width_chgd == true)
+    if (user_options->backend_vector_width_chgd == true)
     {
-      event_log_info (hashcat_ctx, "# option: --opencl-vector-width=%u", user_options->opencl_vector_width);
+      event_log_info (hashcat_ctx, "# option: --backend-vector-width=%u", user_options->backend_vector_width);
     }
 
     if (user_options->kernel_accel_chgd == true)
@@ -1851,7 +1851,7 @@ void user_options_extra_init (hashcat_ctx_t *hashcat_ctx)
   {
 
   }
-  else if (user_options->opencl_info == true)
+  else if (user_options->backend_info == true)
   {
 
   }
@@ -2563,7 +2563,7 @@ int user_options_check_files (hashcat_ctx_t *hashcat_ctx)
 
   hcfree (modulefile);
 
-  // same check but for an OpenCL kernel
+  // same check but for an backend kernel
 
   char *kernelfile = (char *) hcmalloc (HCBUFSIZ_TINY);
 
@@ -2694,7 +2694,7 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx)
   logfile_top_string (user_options->induction_dir);
   logfile_top_string (user_options->keyboard_layout_mapping);
   logfile_top_string (user_options->markov_hcstat2);
-  logfile_top_string (user_options->opencl_devices);
+  logfile_top_string (user_options->backend_devices);
   logfile_top_string (user_options->opencl_device_types);
   logfile_top_string (user_options->outfile);
   logfile_top_string (user_options->outfile_check_dir);
@@ -2740,8 +2740,8 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx)
   logfile_top_uint   (user_options->markov_classic);
   logfile_top_uint   (user_options->markov_disable);
   logfile_top_uint   (user_options->markov_threshold);
-  logfile_top_uint   (user_options->opencl_info);
-  logfile_top_uint   (user_options->opencl_vector_width);
+  logfile_top_uint   (user_options->backend_info);
+  logfile_top_uint   (user_options->backend_vector_width);
   logfile_top_uint   (user_options->optimized_kernel_enable);
   logfile_top_uint   (user_options->outfile_autohex);
   logfile_top_uint   (user_options->outfile_check_timer);
diff --git a/src/wordlist.c b/src/wordlist.c
index bafc9a2dd..34b595729 100644
--- a/src/wordlist.c
+++ b/src/wordlist.c
@@ -560,7 +560,7 @@ int wl_data_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->benchmark      == true) return 0;
   if (user_options->example_hashes == true) return 0;
   if (user_options->left           == true) return 0;
-  if (user_options->opencl_info    == true) return 0;
+  if (user_options->backend_info   == true) return 0;
   if (user_options->usage          == true) return 0;
   if (user_options->version        == true) return 0;
 

From 456c57a6d0e4e87bf4828571536a4de064ac62f9 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 1 May 2019 18:20:19 +0200
Subject: [PATCH 19/73] Set vector width size for CUDA

---
 include/types.h |  2 +-
 src/backend.c   | 90 ++++++++++++++++++++++++++++++++++---------------
 2 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/include/types.h b/include/types.h
index 9bd78be0c..42a7a1c12 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1016,7 +1016,7 @@ typedef struct hc_device_param
 
   st_status_t st_status;
 
-  u32     vector_width;
+  int     vector_width;
 
   u32     kernel_wgs1;
   u32     kernel_wgs12;
diff --git a/src/backend.c b/src/backend.c
index b5d743f8b..47f40e9b0 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5404,7 +5404,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     // vector_width
 
-    cl_uint vector_width;
+    int vector_width;
 
     if (user_options->backend_vector_width_chgd == false)
     {
@@ -5425,15 +5425,35 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (hashconfig->opti_type & OPTI_TYPE_USES_BITS_64)
         {
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
+          if (device_param->is_cuda == true)
+          {
+            // cuda does not support this query
 
-          if (CL_rc == -1) return -1;
+            vector_width = 1;
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
         }
         else
         {
-          CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL);
+          if (device_param->is_cuda == true)
+          {
+            // cuda does not support this query
 
-          if (CL_rc == -1) return -1;
+            vector_width = 1;
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
         }
       }
       else
@@ -5603,30 +5623,38 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * create context for each device
      */
 
-    /*
-    cl_context_properties properties[3];
+    if (device_param->is_cuda == true)
+    {
 
-    properties[0] = CL_CONTEXT_PLATFORM;
-    properties[1] = (cl_context_properties) device_param->opencl_platform;
-    properties[2] = 0;
+    }
 
-    CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
-    */
+    if (device_param->is_opencl == true)
+    {
+      /*
+      cl_context_properties properties[3];
 
-    CL_rc = hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
+      properties[0] = CL_CONTEXT_PLATFORM;
+      properties[1] = (cl_context_properties) device_param->opencl_platform;
+      properties[2] = 0;
 
-    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
+      */
 
-    /**
-     * create command-queue
-     */
+      CL_rc = hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
 
-    // not supported with NV
-    // device_param->command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
+      if (CL_rc == -1) return -1;
 
-    CL_rc = hc_clCreateCommandQueue (hashcat_ctx, device_param->context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->command_queue);
+      /**
+       * create command-queue
+       */
 
-    if (CL_rc == -1) return -1;
+      // not supported with NV
+      // device_param->command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
+
+      CL_rc = hc_clCreateCommandQueue (hashcat_ctx, device_param->context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->command_queue);
+
+      if (CL_rc == -1) return -1;
+    }
 
     /**
      * create input buffers on device : calculate size of fixed memory buffers
@@ -5902,7 +5930,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (rc_read_kernel == false) return -1;
 
-        if (backend_ctx->nvrtc)
+        if (device_param->is_cuda)
         {
           nvrtcProgram program;
 
@@ -5982,7 +6010,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           if (rc_nvrtcDestroyProgram == -1) return -1;
         }
 
-        if (1) // later just else
+        if (device_param->is_opencl)
         {
           CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program);
 
@@ -6052,13 +6080,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (rc_read_kernel == false) return -1;
 
-        CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program);
+        if (device_param->is_cuda)
+        {
 
-        if (CL_rc == -1) return -1;
+        }
 
-        CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->opencl_device, build_options_module_buf, NULL, NULL);
+        if (device_param->is_opencl)
+        {
+          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program);
 
-        if (CL_rc == -1) return -1;
+          if (CL_rc == -1) return -1;
+
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->opencl_device, build_options_module_buf, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
       }
 
       hcfree (kernel_sources[0]);

From a6fa7a2749932de7ea6314de0bb89f37afd3598b Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 2 May 2019 14:58:52 +0200
Subject: [PATCH 20/73] Add support for some first CUDA module loader

---
 include/backend.h  |   5 +
 include/ext_cuda.h |  17 ++
 include/types.h    |   5 +-
 src/Makefile       |   2 +-
 src/backend.c      | 507 +++++++++++++++++++++++++++++++++++++++------
 5 files changed, 475 insertions(+), 61 deletions(-)

diff --git a/include/backend.h b/include/backend.h
index 9323d3880..d5d588f71 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -46,6 +46,11 @@ int hc_cuDeviceGet               (hashcat_ctx_t *hashcat_ctx, CUdevice *device,
 int hc_cuDeviceGetName           (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev);
 int hc_cuDeviceTotalMem          (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev);
 int hc_cuDriverGetVersion        (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
+int hc_cuCtxCreate               (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev);
+int hc_cuCtxDestroy              (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
+int hc_cuModuleLoadDataEx        (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
+int hc_cuModuleUnload            (hashcat_ctx_t *hashcat_ctx, CUmodule hmod);
+int hc_cuCtxSetCurrent           (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
diff --git a/include/ext_cuda.h b/include/ext_cuda.h
index d51fd2286..f48cca490 100644
--- a/include/ext_cuda.h
+++ b/include/ext_cuda.h
@@ -916,6 +916,23 @@ typedef enum CUfunction_attribute_enum {
     CU_FUNC_ATTRIBUTE_MAX
 } CUfunction_attribute;
 
+/**
+ * Context creation flags
+ */
+typedef enum CUctx_flags_enum {
+    CU_CTX_SCHED_AUTO          = 0x00, /**< Automatic scheduling */
+    CU_CTX_SCHED_SPIN          = 0x01, /**< Set spin as default scheduling */
+    CU_CTX_SCHED_YIELD         = 0x02, /**< Set yield as default scheduling */
+    CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
+    CU_CTX_BLOCKING_SYNC       = 0x04, /**< Set blocking synchronization as default scheduling
+                                         *  \deprecated This flag was deprecated as of CUDA 4.0
+                                         *  and was replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. */
+    CU_CTX_SCHED_MASK          = 0x07,
+    CU_CTX_MAP_HOST            = 0x08, /**< Support mapped pinned allocations */
+    CU_CTX_LMEM_RESIZE_TO_MAX  = 0x10, /**< Keep local memory allocation after launch */
+    CU_CTX_FLAGS_MASK          = 0x1f
+} CUctx_flags;
+
 #ifdef _WIN32
 #define CUDAAPI __stdcall
 #else
diff --git a/include/types.h b/include/types.h
index 42a7a1c12..948859cdc 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1246,6 +1246,7 @@ typedef struct hc_device_param
   bool   is_cuda;
 
   CUdevice        cuda_device;
+  CUcontext       cuda_context;
 
   // API: opencl
 
@@ -1263,6 +1264,8 @@ typedef struct hc_device_param
   cl_uint         opencl_device_vendor_id;
   cl_uint         opencl_platform_vendor_id;
 
+  cl_context opencl_context;
+
   cl_kernel  kernel1;
   cl_kernel  kernel12;
   cl_kernel  kernel2;
@@ -1284,8 +1287,6 @@ typedef struct hc_device_param
   cl_kernel  kernel_aux3;
   cl_kernel  kernel_aux4;
 
-  cl_context context;
-
   cl_program program;
   cl_program program_mp;
   cl_program program_amp;
diff --git a/src/Makefile b/src/Makefile
index 3f90fafe0..9ffea7d67 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,7 +4,7 @@
 ##
 
 SHARED                  := 0
-DEBUG                   := 0
+DEBUG                   := 1
 PRODUCTION              := 0
 PRODUCTION_VERSION      := v5.1.0
 ENABLE_BRAIN            := 1
diff --git a/src/backend.c b/src/backend.c
index 47f40e9b0..bd28a087c 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -271,6 +271,94 @@ static bool setup_opencl_device_types_filter (hashcat_ctx_t *hashcat_ctx, const
   return true;
 }
 
+static bool cuda_test_instruction (hashcat_ctx_t *hashcat_ctx, const int sm_major, const int sm_minor, const char *kernel_buf)
+{
+  nvrtcProgram program;
+
+  const int rc_nvrtcCreateProgram = hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_buf, "test_instruction", 0, NULL, NULL);
+
+  if (rc_nvrtcCreateProgram == -1) return false;
+
+  char *nvrtc_options[3];
+
+  nvrtc_options[0] = "--gpu-architecture";
+
+  hc_asprintf (&nvrtc_options[1], "compute_%d%d", sm_major, sm_minor);
+
+  nvrtc_options[2] = NULL;
+
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  NVRTC_PTR *nvrtc = backend_ctx->nvrtc;
+
+  const nvrtcResult NVRTC_err = nvrtc->nvrtcCompileProgram (program, 2, (const char * const *) nvrtc_options);
+
+  hcfree (nvrtc_options[1]);
+
+  size_t build_log_size = 0;
+
+  hc_nvrtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
+
+  if (NVRTC_err != NVRTC_SUCCESS)
+  {
+    char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+    const int rc_nvrtcGetProgramLog = hc_nvrtcGetProgramLog (hashcat_ctx, program, build_log);
+
+    if (rc_nvrtcGetProgramLog == -1) return false;
+
+    puts (build_log);
+
+    hcfree (build_log);
+
+    hc_nvrtcDestroyProgram (hashcat_ctx, &program);
+
+    return false;
+  }
+
+  size_t binary_size;
+
+  const int rc_nvrtcGetPTXSize = hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size);
+
+  if (rc_nvrtcGetPTXSize == -1) return false;
+
+  char *binary = (char *) hcmalloc (binary_size);
+
+  const int nvrtcGetPTX = hc_nvrtcGetPTX (hashcat_ctx, program, binary);
+
+  if (nvrtcGetPTX == -1)
+  {
+    hcfree (binary);
+
+    return false;
+  }
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  CUmodule cuda_module;
+
+  const CUresult CU_err = cuda->cuModuleLoadDataEx (&cuda_module, binary, 0, NULL, NULL);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    hcfree (binary);
+
+    return false;
+  }
+
+  hcfree (binary);
+
+  const int rc_cuModuleUnload = hc_cuModuleUnload (hashcat_ctx, cuda_module);
+
+  if (rc_cuModuleUnload == -1) return false;
+
+  const int rc_nvrtcDestroyProgram = hc_nvrtcDestroyProgram (hashcat_ctx, &program);
+
+  if (rc_nvrtcDestroyProgram == -1) return false;
+
+  return true;
+}
+
 static bool opencl_test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, const char *kernel_buf)
 {
   int CL_rc;
@@ -1052,6 +1140,141 @@ int hc_cuDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion)
   return 0;
 }
 
+int hc_cuCtxCreate (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuCtxCreate (pctx, flags, dev);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuCtxCreate(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuCtxCreate(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuCtxDestroy (hashcat_ctx_t *hashcat_ctx, CUcontext ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuCtxDestroy (ctx);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuCtxDestroy(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuCtxDestroy(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuModuleLoadDataEx (module, image, numOptions, options, optionValues);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuModuleLoadDataEx(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuModuleLoadDataEx(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuModuleUnload (hmod);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuModuleUnload(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuModuleUnload(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuCtxSetCurrent (ctx);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuCtxSetCurrent(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuCtxSetCurrent(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 // OpenCL
 
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
@@ -4077,7 +4300,177 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         device_param->skipped = true;
       }
 
+      if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
+      {
+        need_nvml = true;
 
+        #if defined (_WIN) || defined (__CYGWIN__)
+        need_nvapi = true;
+        #endif
+      }
+
+      // CPU burning loop damper
+      // Value is given as number between 0-100
+      // By default 8%
+      // in theory not needed with CUDA
+
+      device_param->spin_damp = (double) user_options->spin_damp / 100;
+
+      // common driver check
+
+      if (device_param->skipped == false)
+      {
+        if ((user_options->force == false) && (user_options->backend_info == false))
+        {
+          // CUDA does not support query nvidia driver version, therefore no driver checks here
+          // IF needed, could be retrieved using nvmlSystemGetDriverVersion()
+
+          if (device_param->sm_major < 5)
+          {
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+          }
+
+          if (device_param->kernel_exec_timeout != 0)
+          {
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
+            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+          }
+        }
+
+        /**
+         * activate device
+         */
+
+        cuda_devices_active++;
+      }
+
+      CUcontext cuda_context;
+
+      const int rc_cuCtxCreate = hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_YIELD, device_param->cuda_device);
+
+      if (rc_cuCtxCreate == -1) return -1;
+
+      const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, cuda_context);
+
+      if (rc_cuCtxSetCurrent == -1) return -1;
+
+      const bool has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
+
+      device_param->has_bfe = has_bfe;
+
+      const bool has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");
+
+      device_param->has_lop3 = has_lop3;
+
+      const bool has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
+
+      device_param->has_mov64 = has_mov64;
+
+      const bool has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
+
+      device_param->has_prmt = has_prmt;
+
+
+
+      const int rc_cuCtxDestroy = hc_cuCtxDestroy (hashcat_ctx, cuda_context);
+
+      if (rc_cuCtxDestroy == -1) return -1;
+
+
+
+      /*
+
+
+        const bool has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
+
+        device_param->has_bfe = has_bfe;
+
+
+
+        // device_available_mem
+
+        #define MAX_ALLOC_CHECKS_CNT  8192
+        #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
+
+        device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
+
+        // OK, so the problem here is the following:
+        // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
+        // but there's no way to ask for available memory on the device.
+        // In combination, most OpenCL runtimes implementation of clCreateBuffer()
+        // are doing so called lazy memory allocation on the device.
+        // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
+        // running on the host we end up with an error type of this:
+        // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
+        // The clEnqueueNDRangeKernel() is because of the lazy allocation
+        // The best way to workaround this problem is if we would be able to ask for available memory,
+        // The idea here is to try to evaluate available memory by allocating it till it errors
+
+        cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
+
+        u64 c;
+
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+          cl_int CL_err;
+
+          OCL_PTR *ocl = backend_ctx->ocl;
+
+          tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
+
+          if (CL_err != CL_SUCCESS)
+          {
+            c--;
+
+            break;
+          }
+
+          // transfer only a few byte should be enough to force the runtime to actually allocate the memory
+
+          u8 tmp_host[8];
+
+          CL_err = ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL);
+
+          if (CL_err != CL_SUCCESS) break;
+
+          CL_err = ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL);
+
+          if (CL_err != CL_SUCCESS) break;
+
+          CL_err = ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL);
+
+          if (CL_err != CL_SUCCESS) break;
+
+          CL_err = ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL);
+
+          if (CL_err != CL_SUCCESS) break;
+        }
+
+        device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
+
+        // clean up
+
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+          if (tmp_device[c] != NULL)
+          {
+            CL_rc = hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]);
+
+            if (CL_rc == -1) return -1;
+          }
+        }
+
+        hcfree (tmp_device);
+
+
+      */
     }
   }
 
@@ -4928,10 +5321,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
 
         hc_clReleaseContext (hashcat_ctx, context);
-
-        // next please
-
-
       }
     }
   }
@@ -5367,8 +5756,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
   for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
-    int CL_rc = CL_SUCCESS;
-
     /**
      * host buffer
      */
@@ -5434,7 +5821,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (device_param->is_opencl == true)
           {
-            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
+            const int CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
 
             if (CL_rc == -1) return -1;
           }
@@ -5450,7 +5837,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (device_param->is_opencl == true)
           {
-            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL);
+            const int CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL);
 
             if (CL_rc == -1) return -1;
           }
@@ -5625,7 +6012,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_cuda == true)
     {
+      int CU_rc = hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_YIELD, device_param->cuda_device);
 
+      if (CU_rc == -1) return -1;
     }
 
     if (device_param->is_opencl == true)
@@ -5637,10 +6026,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       properties[1] = (cl_context_properties) device_param->opencl_platform;
       properties[2] = 0;
 
-      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
+      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
       */
 
-      CL_rc = hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->context);
+      int CL_rc = hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
 
       if (CL_rc == -1) return -1;
 
@@ -5651,7 +6040,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       // not supported with NV
       // device_param->command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
 
-      CL_rc = hc_clCreateCommandQueue (hashcat_ctx, device_param->context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->command_queue);
+      CL_rc = hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->command_queue);
 
       if (CL_rc == -1) return -1;
     }
@@ -6012,7 +6401,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->is_opencl)
         {
-          CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program);
+          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->program);
 
           if (CL_rc == -1) return -1;
 
@@ -6087,7 +6476,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->is_opencl)
         {
-          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program);
+          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program);
 
           if (CL_rc == -1) return -1;
 
@@ -6175,7 +6564,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program_mp);
+          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->program_mp);
 
           if (CL_rc == -1) return -1;
 
@@ -6240,7 +6629,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_mp);
+          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_mp);
 
           if (CL_rc == -1) return -1;
 
@@ -6330,7 +6719,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program_amp);
+          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->program_amp);
 
           if (CL_rc == -1) return -1;
 
@@ -6395,7 +6784,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_amp);
+          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_amp);
 
           if (CL_rc == -1) return -1;
 
@@ -6427,25 +6816,27 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * global buffers
      */
 
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_a);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_b);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_c);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_d);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_a);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_b);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_c);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_d);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_plains,             NULL, &device_param->d_plain_bufs);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   size_digests,            NULL, &device_param->d_digests_buf);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_shown,              NULL, &device_param->d_digests_shown);  if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   size_salts,              NULL, &device_param->d_salt_bufs);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_results,            NULL, &device_param->d_result);         if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra0_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra1_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra2_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra3_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->d_st_digests_buf); if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->d_st_salts_buf);   if (CL_rc == -1) return -1;
+    int CL_rc;
+
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_a);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_b);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_c);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_d);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_a);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_b);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_c);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_d);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_plains,             NULL, &device_param->d_plain_bufs);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_digests,            NULL, &device_param->d_digests_buf);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_shown,              NULL, &device_param->d_digests_shown);  if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_salts,              NULL, &device_param->d_salt_bufs);      if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_results,            NULL, &device_param->d_result);         if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra0_buf);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra1_buf);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra2_buf);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra3_buf);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->d_st_digests_buf); if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->d_st_salts_buf);   if (CL_rc == -1) return -1;
 
     CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s1_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
     CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s1_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
@@ -6464,37 +6855,37 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (user_options->slow_candidates == true)
     {
-      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->d_rules_c); if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->d_rules_c); if (CL_rc == -1) return -1;
     }
     else
     {
       if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->d_rules);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->d_rules_c); if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->d_rules);   if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->d_rules_c); if (CL_rc == -1) return -1;
 
         CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_rules, CL_TRUE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->d_combs);          if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->d_combs_c);        if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->d_root_css_buf);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->d_markov_css_buf); if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->d_combs);          if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->d_combs_c);        if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->d_root_css_buf);   if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->d_markov_css_buf); if (CL_rc == -1) return -1;
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->d_bfs);            if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->d_bfs_c);          if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_tm,         NULL, &device_param->d_tm_c);           if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->d_root_css_buf);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->d_markov_css_buf); if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->d_bfs);            if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->d_bfs_c);          if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm,         NULL, &device_param->d_tm_c);           if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->d_root_css_buf);   if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->d_markov_css_buf); if (CL_rc == -1) return -1;
       }
     }
 
     if (size_esalts)
     {
-      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->d_esalt_bufs);
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->d_esalt_bufs);
 
       if (CL_rc == -1) return -1;
 
@@ -6510,7 +6901,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (size_esalts)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->d_st_esalts_buf);
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->d_st_esalts_buf);
 
         if (CL_rc == -1) return -1;
 
@@ -7642,12 +8033,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     device_param->size_brain_link_out = size_brain_link_out;
     #endif
 
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_pws,      NULL, &device_param->d_pws_buf);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_pws_amp,  NULL, &device_param->d_pws_amp_buf);  if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   size_pws_comp, NULL, &device_param->d_pws_comp_buf); if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY,   size_pws_idx,  NULL, &device_param->d_pws_idx);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_tmps,     NULL, &device_param->d_tmps);         if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE,  size_hooks,    NULL, &device_param->d_hooks);        if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws,      NULL, &device_param->d_pws_buf);      if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws_amp,  NULL, &device_param->d_pws_amp_buf);  if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_comp, NULL, &device_param->d_pws_comp_buf); if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_idx,  NULL, &device_param->d_pws_idx);      if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_tmps,     NULL, &device_param->d_tmps);         if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_hooks,    NULL, &device_param->d_hooks);        if (CL_rc == -1) return -1;
 
     CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
     CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_amp_buf,   device_param->size_pws_amp);  if (CL_rc == -1) return -1;
@@ -7866,7 +8257,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
 
       if (device_param->command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->command_queue);
 
-      if (device_param->context)          hc_clReleaseContext (hashcat_ctx, device_param->context);
+      if (device_param->opencl_context)          hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
     }
 
     device_param->pws_comp            = NULL;
@@ -7941,7 +8332,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
     device_param->program_mp          = NULL;
     device_param->program_amp         = NULL;
     device_param->command_queue       = NULL;
-    device_param->context             = NULL;
+    device_param->opencl_context             = NULL;
   }
 }
 

From af8e317cf40a26d042d785dbcbf673780ea37445 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 2 May 2019 17:12:59 +0200
Subject: [PATCH 21/73] Begin renaming some OpenCL only variables

---
 include/backend.h |  13 +-
 include/types.h   |   5 +-
 src/autotune.c    |   2 +-
 src/backend.c     | 462 +++++++++++++++++++++++++++++-----------------
 src/hashes.c      |  10 +-
 src/selftest.c    |  26 +--
 6 files changed, 323 insertions(+), 195 deletions(-)

diff --git a/include/backend.h b/include/backend.h
index d5d588f71..ca3f8548e 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -39,18 +39,23 @@ int hc_nvrtcGetProgramLog        (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog,
 int hc_nvrtcGetPTXSize           (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet);
 int hc_nvrtcGetPTX               (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx);
 
-int hc_cuInit                    (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
+int hc_cuCtxCreate               (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev);
+int hc_cuCtxDestroy              (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
+int hc_cuCtxSetCurrent           (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
 int hc_cuDeviceGetAttribute      (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev);
 int hc_cuDeviceGetCount          (hashcat_ctx_t *hashcat_ctx, int *count);
 int hc_cuDeviceGet               (hashcat_ctx_t *hashcat_ctx, CUdevice *device, int ordinal);
 int hc_cuDeviceGetName           (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev);
 int hc_cuDeviceTotalMem          (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev);
 int hc_cuDriverGetVersion        (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
-int hc_cuCtxCreate               (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev);
-int hc_cuCtxDestroy              (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
+int hc_cuInit                    (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
+int hc_cuMemAlloc                (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize);
+int hc_cuMemcpyDtoD              (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
+int hc_cuMemcpyDtoH              (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
+int hc_cuMemcpyHtoD              (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
+int hc_cuMemFree                 (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr);
 int hc_cuModuleLoadDataEx        (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
 int hc_cuModuleUnload            (hashcat_ctx_t *hashcat_ctx, CUmodule hmod);
-int hc_cuCtxSetCurrent           (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
diff --git a/include/types.h b/include/types.h
index 948859cdc..c01bcd72e 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1264,7 +1264,8 @@ typedef struct hc_device_param
   cl_uint         opencl_device_vendor_id;
   cl_uint         opencl_platform_vendor_id;
 
-  cl_context opencl_context;
+  cl_context        opencl_context;
+  cl_command_queue  opencl_command_queue;
 
   cl_kernel  kernel1;
   cl_kernel  kernel12;
@@ -1291,8 +1292,6 @@ typedef struct hc_device_param
   cl_program program_mp;
   cl_program program_amp;
 
-  cl_command_queue command_queue;
-
   cl_mem  d_pws_buf;
   cl_mem  d_pws_amp_buf;
   cl_mem  d_pws_comp_buf;
diff --git a/src/autotune.c b/src/autotune.c
index ab383e63f..390c0e463 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -118,7 +118,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     {
       if (straight_ctx->kernel_rules_cnt > 1)
       {
-        CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_rules, device_param->d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
+        CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_rules, device_param->d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
diff --git a/src/backend.c b/src/backend.c
index bd28a087c..3c234d3a9 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -1275,6 +1275,141 @@ int hc_cuCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx)
   return 0;
 }
 
+int hc_cuMemAlloc (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemAlloc (dptr, bytesize);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemAlloc(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemAlloc(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemFree (dptr);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemFree(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemFree(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemcpyDtoH (dstHost, srcDevice, ByteCount);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyDtoH(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyDtoH(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemcpyDtoD (dstDevice, srcDevice, ByteCount);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyDtoD(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyDtoD(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemcpyHtoD (dstDevice, srcHost, ByteCount);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyHtoD(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyHtoD(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 // OpenCL
 
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
@@ -1954,7 +2089,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
 
   int CL_rc;
 
-  CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL);
+  CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL);
 
   if (CL_rc == -1) return -1;
 
@@ -1964,7 +2099,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
 
   if (cnt > 0)
   {
-    CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), pw->i, 0, NULL, NULL);
+    CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), pw->i, 0, NULL, NULL);
 
     if (CL_rc == -1) return -1;
   }
@@ -2015,7 +2150,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
+          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -2058,7 +2193,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     if (run_init == true)
     {
-      CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_amp_buf, device_param->d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
+      CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_amp_buf, device_param->d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
 
@@ -2082,13 +2217,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
 
         module_ctx->module_hook12 (device_param, hashes->hook_salts_buf, salt_pos, pws_cnt);
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -2154,13 +2289,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
 
         module_ctx->module_hook23 (device_param, hashes->hook_salts_buf, salt_pos, pws_cnt);
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -2393,7 +2528,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
     const size_t global_work_size[3] = { num_elements,  32, 1 };
     const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
-    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event);
+    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event);
 
     if (CL_rc == -1) return -1;
   }
@@ -2426,12 +2561,12 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
     const size_t global_work_size[3] = { num_elements,   1, 1 };
     const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
-    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event);
+    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event);
 
     if (CL_rc == -1) return -1;
   }
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
@@ -2538,7 +2673,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
@@ -2611,15 +2746,15 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   const size_t global_work_size[3] = { num_elements,   1, 1 };
   const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
@@ -2639,15 +2774,15 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
   int CL_rc;
 
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
@@ -2675,15 +2810,15 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   const size_t global_work_size[3] = { num_elements,    1, 1 };
   const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
 
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
@@ -2715,15 +2850,15 @@ int run_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
@@ -2757,15 +2892,15 @@ int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
     const size_t global_work_size[3] = { num_elements,   1, 1 };
     const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
-    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
+    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
+    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
 
     if (CL_rc == -1) return -1;
   }
@@ -2781,7 +2916,7 @@ int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
 
     int CL_rc;
 
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL);
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL);
 
     if (CL_rc == -1) return -1;
   }
@@ -2810,15 +2945,15 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
+  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
 
   if (CL_rc == -1) return -1;
 
@@ -2855,7 +2990,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
   {
     int CL_rc;
 
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
     if (CL_rc == -1) return -1;
 
@@ -2865,7 +3000,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
     if (off)
     {
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
     }
@@ -2880,7 +3015,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
     {
       int CL_rc;
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
 
@@ -2890,7 +3025,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
       if (off)
       {
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -2939,7 +3074,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         int CL_rc;
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -2949,7 +3084,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         if (off)
         {
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -2964,7 +3099,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
         {
           int CL_rc;
 
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
 
@@ -2974,7 +3109,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (off)
           {
-            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
             if (CL_rc == -1) return -1;
           }
@@ -2987,7 +3122,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
         {
           int CL_rc;
 
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
 
@@ -2997,7 +3132,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (off)
           {
-            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
             if (CL_rc == -1) return -1;
           }
@@ -3196,7 +3331,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
       {
         if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
-          const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_rules, device_param->d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL);
+          const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_rules, device_param->d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -3302,7 +3437,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               innerloop_left = i;
 
-              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
+              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3318,7 +3453,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (CL_rc == -1) return -1;
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3334,7 +3469,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (CL_rc == -1) return -1;
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3441,7 +3576,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               innerloop_left = i;
 
-              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
+              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3457,7 +3592,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (CL_rc == -1) return -1;
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3475,7 +3610,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bfs, device_param->d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL);
+          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bfs, device_param->d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -4282,11 +4417,22 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         device_param->skipped = true;
       }
 
-      // device_local_mem_type
+      // some attributes have to be hardcoded because they are used for instance in the build options
 
-      cl_device_local_mem_type device_local_mem_type = CL_LOCAL;
+      device_param->device_local_mem_type     = CL_LOCAL;
+      device_param->opencl_device_type        = CL_DEVICE_TYPE_GPU;
+      device_param->opencl_device_vendor_id   = VENDOR_ID_NV;
+      device_param->opencl_platform_vendor_id = VENDOR_ID_NV;
 
-      device_param->device_local_mem_type = device_local_mem_type;
+      // or in the cached kernel checksum
+
+      device_param->opencl_device_version     = "";
+      device_param->opencl_driver_version     = "";
+
+      // or just to make sure they are not NULL
+
+      device_param->opencl_device_vendor     = "";
+      device_param->opencl_device_c_version  = "";
 
       // skipped
 
@@ -4373,104 +4519,78 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->has_prmt = has_prmt;
 
+      // device_available_mem
 
+      #define MAX_ALLOC_CHECKS_CNT  8192
+      #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
+
+      device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
+
+      // OK, so the problem here is the following:
+      // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
+      // but there's no way to ask for available memory on the device.
+      // In combination, most OpenCL runtimes implementation of clCreateBuffer()
+      // are doing so called lazy memory allocation on the device.
+      // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
+      // running on the host we end up with an error type of this:
+      // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
+      // The clEnqueueNDRangeKernel() is because of the lazy allocation
+      // The best way to workaround this problem is if we would be able to ask for available memory,
+      // The idea here is to try to evaluate available memory by allocating it till it errors
+
+      CUdeviceptr *tmp_device = (CUdeviceptr *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (CUdeviceptr));
+
+      u64 c;
+
+      for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+      {
+        if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+        CUresult rc_tmp;
+
+        CUDA_PTR *cuda = backend_ctx->cuda;
+
+        rc_tmp = cuda->cuMemAlloc (&tmp_device[c], MAX_ALLOC_CHECKS_SIZE);
+
+        if (rc_tmp != CUDA_SUCCESS)
+        {
+          c--;
+
+          break;
+        }
+
+        char tmp_host[8];
+
+        rc_tmp = cuda->cuMemcpyDtoH (tmp_host, tmp_device[c], sizeof (tmp_host));
+
+        if (rc_tmp != CUDA_SUCCESS) break;
+
+        rc_tmp = cuda->cuMemcpyHtoD (tmp_device[c], tmp_host, sizeof (tmp_host));
+
+        if (rc_tmp != CUDA_SUCCESS) break;
+      }
+
+      device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
+
+      // clean up
+
+      for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+      {
+        if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+        if (tmp_device[c] != 0)
+        {
+          const int rc_cuMemFree = hc_cuMemFree (hashcat_ctx, tmp_device[c]);
+
+          if (rc_cuMemFree == -1) return -1;
+        }
+      }
+
+      hcfree (tmp_device);
 
       const int rc_cuCtxDestroy = hc_cuCtxDestroy (hashcat_ctx, cuda_context);
 
       if (rc_cuCtxDestroy == -1) return -1;
-
-
-
-      /*
-
-
-        const bool has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
-
-        device_param->has_bfe = has_bfe;
-
-
-
-        // device_available_mem
-
-        #define MAX_ALLOC_CHECKS_CNT  8192
-        #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
-
-        device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
-
-        // OK, so the problem here is the following:
-        // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
-        // but there's no way to ask for available memory on the device.
-        // In combination, most OpenCL runtimes implementation of clCreateBuffer()
-        // are doing so called lazy memory allocation on the device.
-        // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
-        // running on the host we end up with an error type of this:
-        // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
-        // The clEnqueueNDRangeKernel() is because of the lazy allocation
-        // The best way to workaround this problem is if we would be able to ask for available memory,
-        // The idea here is to try to evaluate available memory by allocating it till it errors
-
-        cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
-
-        u64 c;
-
-        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
-        {
-          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
-
-          cl_int CL_err;
-
-          OCL_PTR *ocl = backend_ctx->ocl;
-
-          tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
-
-          if (CL_err != CL_SUCCESS)
-          {
-            c--;
-
-            break;
-          }
-
-          // transfer only a few byte should be enough to force the runtime to actually allocate the memory
-
-          u8 tmp_host[8];
-
-          CL_err = ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL);
-
-          if (CL_err != CL_SUCCESS) break;
-
-          CL_err = ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL);
-
-          if (CL_err != CL_SUCCESS) break;
-
-          CL_err = ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL);
-
-          if (CL_err != CL_SUCCESS) break;
-
-          CL_err = ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL);
-
-          if (CL_err != CL_SUCCESS) break;
-        }
-
-        device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
-
-        // clean up
-
-        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
-        {
-          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
-
-          if (tmp_device[c] != NULL)
-          {
-            CL_rc = hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]);
-
-            if (CL_rc == -1) return -1;
-          }
-        }
-
-        hcfree (tmp_device);
-
-
-      */
     }
   }
 
@@ -6038,9 +6158,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
        */
 
       // not supported with NV
-      // device_param->command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
+      // device_param->opencl_command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
 
-      CL_rc = hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->command_queue);
+      CL_rc = hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue);
 
       if (CL_rc == -1) return -1;
     }
@@ -6200,8 +6320,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     char *device_name_chksum        = (char *) hcmalloc (HCBUFSIZ_TINY);
     char *device_name_chksum_amp_mp = (char *) hcmalloc (HCBUFSIZ_TINY);
 
-    const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%u-%s-%s-%s-%d-%u",
+    const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%u-%s-%s-%s-%d-%u",
       backend_ctx->comptime,
+      backend_ctx->cuda_driver_version,
+      device_param->is_opencl,
       device_param->opencl_platform_vendor_id,
       device_param->device_name,
       device_param->opencl_device_version,
@@ -6209,8 +6331,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->vector_width,
       hashconfig->kern_type);
 
-    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%u-%s-%s-%s",
+    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%s-%s-%s",
       backend_ctx->comptime,
+      backend_ctx->cuda_driver_version,
+      device_param->is_opencl,
       device_param->opencl_platform_vendor_id,
       device_param->device_name,
       device_param->opencl_device_version,
@@ -6838,16 +6962,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->d_st_digests_buf); if (CL_rc == -1) return -1;
     CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->d_st_salts_buf);   if (CL_rc == -1) return -1;
 
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s1_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s1_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s1_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s1_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s2_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s2_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s2_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bitmap_s2_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_digests_buf,     CL_TRUE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_salt_bufs,       CL_TRUE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s1_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s1_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s1_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s1_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s2_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s2_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s2_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s2_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_digests_buf,     CL_TRUE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_salt_bufs,       CL_TRUE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL); if (CL_rc == -1) return -1;
 
     /**
      * special buffers
@@ -6864,7 +6988,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->d_rules);   if (CL_rc == -1) return -1;
         CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->d_rules_c); if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_rules, CL_TRUE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_rules, CL_TRUE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
       {
@@ -6889,15 +7013,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_esalt_bufs, CL_TRUE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_esalt_bufs, CL_TRUE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
     }
 
     if (hashconfig->st_hash != NULL)
     {
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_st_digests_buf,  CL_TRUE, 0, size_st_digests,         hashes->st_digests_buf,  0, NULL, NULL); if (CL_rc == -1) return -1;
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_st_salts_buf,    CL_TRUE, 0, size_st_salts,           hashes->st_salts_buf,    0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_st_digests_buf,  CL_TRUE, 0, size_st_digests,         hashes->st_digests_buf,  0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_st_salts_buf,    CL_TRUE, 0, size_st_salts,           hashes->st_salts_buf,    0, NULL, NULL); if (CL_rc == -1) return -1;
 
       if (size_esalts)
       {
@@ -6905,7 +7029,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_st_esalts_buf, CL_TRUE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_st_esalts_buf, CL_TRUE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -8255,7 +8379,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       if (device_param->program_mp)       hc_clReleaseProgram (hashcat_ctx, device_param->program_mp);
       if (device_param->program_amp)      hc_clReleaseProgram (hashcat_ctx, device_param->program_amp);
 
-      if (device_param->command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->command_queue);
+      if (device_param->opencl_command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
 
       if (device_param->opencl_context)          hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
     }
@@ -8331,7 +8455,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
     device_param->program             = NULL;
     device_param->program_mp          = NULL;
     device_param->program_amp         = NULL;
-    device_param->command_queue       = NULL;
+    device_param->opencl_command_queue       = NULL;
     device_param->opencl_context             = NULL;
   }
 }
@@ -8478,8 +8602,8 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
       for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
       for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_uint),  device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
     }
   }
 
@@ -8523,8 +8647,8 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
       for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_uint),  device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
       for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
     }
   }
 
diff --git a/src/hashes.c b/src/hashes.c
index 5306d2341..2cea080b9 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -309,7 +309,7 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
   {
     tmps = hcmalloc (hashconfig->tmp_size);
 
-    hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
+    hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
   }
 
   // hash
@@ -462,7 +462,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
   cl_int CL_err;
 
-  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
 
   if (CL_err != CL_SUCCESS)
   {
@@ -483,7 +483,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   {
     plain_t *cracked = (plain_t *) hccalloc (num_cracked, sizeof (plain_t));
 
-    CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
+    CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
 
     if (CL_err != CL_SUCCESS)
     {
@@ -553,7 +553,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
       memset (hashes->digests_shown_tmp, 0, salt_buf->digests_cnt * sizeof (u32));
 
-      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
+      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
 
       if (CL_err != CL_SUCCESS)
       {
@@ -565,7 +565,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     num_cracked = 0;
 
-    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
 
     if (CL_err != CL_SUCCESS)
     {
diff --git a/src/selftest.c b/src/selftest.c
index 9f97aff2c..2f0535501 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -57,7 +57,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     pw.pw_len = (u32) pw_len;
 
-    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
     if (CL_err != CL_SUCCESS) return -1;
   }
@@ -84,7 +84,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           uppercase ((u8 *) pw_ptr, pw.pw_len);
         }
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
         if (CL_err != CL_SUCCESS) return -1;
       }
@@ -136,11 +136,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           comb_ptr[comb.pw_len] = 0x80;
         }
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL);
+        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL);
 
         if (CL_err != CL_SUCCESS) return -1;
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
         if (CL_err != CL_SUCCESS) return -1;
       }
@@ -165,7 +165,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
           pw.pw_len = (u32) pw_len;
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
           if (CL_err != CL_SUCCESS) return -1;
         }
@@ -208,7 +208,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             bf.i = byte_swap_32 (bf.i);
           }
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL);
+          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL);
 
           if (CL_err != CL_SUCCESS) return -1;
 
@@ -296,7 +296,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             for (int i = 0; i < 14; i++) pw.i[i] = byte_swap_32 (pw.i[i]);
           }
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
           if (CL_err != CL_SUCCESS) return -1;
 
@@ -316,7 +316,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       pw.pw_len = (u32) pw_len;
 
-      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
       if (CL_err != CL_SUCCESS) return -1;
     }
@@ -372,13 +372,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
 
       module_ctx->module_hook12 (device_param, hashes->st_hook_salts_buf, 0, 1);
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
     }
@@ -411,13 +411,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
 
       module_ctx->module_hook23 (device_param, hashes->st_hook_salts_buf, 0, 1);
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
     }
@@ -492,7 +492,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   u32 num_cracked;
 
-  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
 
   if (CL_err != CL_SUCCESS) return -1;
 

From 50a6e720ca98c5140e5f91decd2b0812cc3deb6f Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 2 May 2019 17:30:46 +0200
Subject: [PATCH 22/73] More OpenCL variables rename

---
 include/types.h | 148 ++++----
 src/Makefile    |   2 +-
 src/autotune.c  |  12 +-
 src/backend.c   | 962 ++++++++++++++++++++++++------------------------
 src/hashes.c    |  10 +-
 src/selftest.c  |  58 +--
 6 files changed, 595 insertions(+), 597 deletions(-)

diff --git a/include/types.h b/include/types.h
index c01bcd72e..d38d20191 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1243,93 +1243,91 @@ typedef struct hc_device_param
 
   // API: cuda
 
-  bool   is_cuda;
+  bool              is_cuda;
 
-  CUdevice        cuda_device;
-  CUcontext       cuda_context;
+  CUdevice          cuda_device;
+  CUcontext         cuda_context;
 
   // API: opencl
 
-  bool   is_opencl;
+  bool              is_opencl;
 
-  cl_device_id    opencl_device;
+  char             *opencl_driver_version;
+  char             *opencl_device_vendor;
+  char             *opencl_device_version;
+  char             *opencl_device_c_version;
 
-  char   *opencl_driver_version;
-  char   *opencl_device_vendor;
-  char   *opencl_device_version;
-  char   *opencl_device_c_version;
-
-  //cl_platform_id  opencl_platform;
-  cl_device_type  opencl_device_type;
-  cl_uint         opencl_device_vendor_id;
-  cl_uint         opencl_platform_vendor_id;
+  cl_device_type    opencl_device_type;
+  cl_uint           opencl_device_vendor_id;
+  cl_uint           opencl_platform_vendor_id;
 
+  cl_device_id      opencl_device;
   cl_context        opencl_context;
   cl_command_queue  opencl_command_queue;
 
-  cl_kernel  kernel1;
-  cl_kernel  kernel12;
-  cl_kernel  kernel2;
-  cl_kernel  kernel23;
-  cl_kernel  kernel3;
-  cl_kernel  kernel4;
-  cl_kernel  kernel_init2;
-  cl_kernel  kernel_loop2;
-  cl_kernel  kernel_mp;
-  cl_kernel  kernel_mp_l;
-  cl_kernel  kernel_mp_r;
-  cl_kernel  kernel_amp;
-  cl_kernel  kernel_tm;
-  cl_kernel  kernel_memset;
-  cl_kernel  kernel_atinit;
-  cl_kernel  kernel_decompress;
-  cl_kernel  kernel_aux1;
-  cl_kernel  kernel_aux2;
-  cl_kernel  kernel_aux3;
-  cl_kernel  kernel_aux4;
+  cl_program        opencl_program;
+  cl_program        opencl_program_mp;
+  cl_program        opencl_program_amp;
 
-  cl_program program;
-  cl_program program_mp;
-  cl_program program_amp;
+  cl_kernel         opencl_kernel1;
+  cl_kernel         opencl_kernel12;
+  cl_kernel         opencl_kernel2;
+  cl_kernel         opencl_kernel23;
+  cl_kernel         opencl_kernel3;
+  cl_kernel         opencl_kernel4;
+  cl_kernel         opencl_kernel_init2;
+  cl_kernel         opencl_kernel_loop2;
+  cl_kernel         opencl_kernel_mp;
+  cl_kernel         opencl_kernel_mp_l;
+  cl_kernel         opencl_kernel_mp_r;
+  cl_kernel         opencl_kernel_amp;
+  cl_kernel         opencl_kernel_tm;
+  cl_kernel         opencl_kernel_memset;
+  cl_kernel         opencl_kernel_atinit;
+  cl_kernel         opencl_kernel_decompress;
+  cl_kernel         opencl_kernel_aux1;
+  cl_kernel         opencl_kernel_aux2;
+  cl_kernel         opencl_kernel_aux3;
+  cl_kernel         opencl_kernel_aux4;
 
-  cl_mem  d_pws_buf;
-  cl_mem  d_pws_amp_buf;
-  cl_mem  d_pws_comp_buf;
-  cl_mem  d_pws_idx;
-  cl_mem  d_words_buf_l;
-  cl_mem  d_words_buf_r;
-  cl_mem  d_rules;
-  cl_mem  d_rules_c;
-  cl_mem  d_combs;
-  cl_mem  d_combs_c;
-  cl_mem  d_bfs;
-  cl_mem  d_bfs_c;
-  cl_mem  d_tm_c;
-  cl_mem  d_bitmap_s1_a;
-  cl_mem  d_bitmap_s1_b;
-  cl_mem  d_bitmap_s1_c;
-  cl_mem  d_bitmap_s1_d;
-  cl_mem  d_bitmap_s2_a;
-  cl_mem  d_bitmap_s2_b;
-  cl_mem  d_bitmap_s2_c;
-  cl_mem  d_bitmap_s2_d;
-  cl_mem  d_plain_bufs;
-  cl_mem  d_digests_buf;
-  cl_mem  d_digests_shown;
-  cl_mem  d_salt_bufs;
-  cl_mem  d_esalt_bufs;
-  cl_mem  d_tmps;
-  cl_mem  d_hooks;
-  cl_mem  d_result;
-  cl_mem  d_extra0_buf;
-  cl_mem  d_extra1_buf;
-  cl_mem  d_extra2_buf;
-  cl_mem  d_extra3_buf;
-  cl_mem  d_root_css_buf;
-  cl_mem  d_markov_css_buf;
-  cl_mem  d_st_digests_buf;
-  cl_mem  d_st_salts_buf;
-  cl_mem  d_st_esalts_buf;
+  cl_mem            opencl_d_pws_buf;
+  cl_mem            opencl_d_pws_amp_buf;
+  cl_mem            opencl_d_pws_comp_buf;
+  cl_mem            opencl_d_pws_idx;
+  cl_mem            opencl_d_words_buf_l;
+  cl_mem            opencl_d_words_buf_r;
+  cl_mem            opencl_d_rules;
+  cl_mem            opencl_d_rules_c;
+  cl_mem            opencl_d_combs;
+  cl_mem            opencl_d_combs_c;
+  cl_mem            opencl_d_bfs;
+  cl_mem            opencl_d_bfs_c;
+  cl_mem            opencl_d_tm_c;
+  cl_mem            opencl_d_bitmap_s1_a;
+  cl_mem            opencl_d_bitmap_s1_b;
+  cl_mem            opencl_d_bitmap_s1_c;
+  cl_mem            opencl_d_bitmap_s1_d;
+  cl_mem            opencl_d_bitmap_s2_a;
+  cl_mem            opencl_d_bitmap_s2_b;
+  cl_mem            opencl_d_bitmap_s2_c;
+  cl_mem            opencl_d_bitmap_s2_d;
+  cl_mem            opencl_d_plain_bufs;
+  cl_mem            opencl_d_digests_buf;
+  cl_mem            opencl_d_digests_shown;
+  cl_mem            opencl_d_salt_bufs;
+  cl_mem            opencl_d_esalt_bufs;
+  cl_mem            opencl_d_tmps;
+  cl_mem            opencl_d_hooks;
+  cl_mem            opencl_d_result;
+  cl_mem            opencl_d_extra0_buf;
+  cl_mem            opencl_d_extra1_buf;
+  cl_mem            opencl_d_extra2_buf;
+  cl_mem            opencl_d_extra3_buf;
+  cl_mem            opencl_d_root_css_buf;
+  cl_mem            opencl_d_markov_css_buf;
+  cl_mem            opencl_d_st_digests_buf;
+  cl_mem            opencl_d_st_salts_buf;
+  cl_mem            opencl_d_st_esalts_buf;
 
 } hc_device_param_t;
 
diff --git a/src/Makefile b/src/Makefile
index 9ffea7d67..3f90fafe0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,7 +4,7 @@
 ##
 
 SHARED                  := 0
-DEBUG                   := 1
+DEBUG                   := 0
 PRODUCTION              := 0
 PRODUCTION_VERSION      := v5.1.0
 ENABLE_BRAIN            := 1
diff --git a/src/autotune.c b/src/autotune.c
index 390c0e463..a679ccbec 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -105,7 +105,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   int CL_rc;
 
-  CL_rc = run_kernel_atinit (hashcat_ctx, device_param, device_param->d_pws_buf, kernel_power_max);
+  CL_rc = run_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max);
 
   if (CL_rc == -1) return -1;
 
@@ -118,7 +118,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     {
       if (straight_ctx->kernel_rules_cnt > 1)
       {
-        CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_rules, device_param->d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
+        CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -230,21 +230,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   // reset them fake words
 
-  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->d_pws_buf, 0, device_param->size_pws);
+  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 0, device_param->size_pws);
 
   if (CL_rc == -1) return -1;
 
   // reset other buffers in case autotune cracked something
 
-  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->d_plain_bufs, 0, device_param->size_plains);
+  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, 0, device_param->size_plains);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->d_digests_shown, 0, device_param->size_shown);
+  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, 0, device_param->size_shown);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->d_result, 0, device_param->size_results);
+  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_result, 0, device_param->size_results);
 
   if (CL_rc == -1) return -1;
 
diff --git a/src/backend.c b/src/backend.c
index 3c234d3a9..dca1e6e7b 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2089,7 +2089,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
 
   int CL_rc;
 
-  CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL);
+  CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL);
 
   if (CL_rc == -1) return -1;
 
@@ -2099,7 +2099,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
 
   if (cnt > 0)
   {
-    CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), pw->i, 0, NULL, NULL);
+    CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), pw->i, 0, NULL, NULL);
 
     if (CL_rc == -1) return -1;
   }
@@ -2142,7 +2142,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         {
           const u32 size_tm = 32 * sizeof (bs_word_t);
 
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tm_c, size_tm);
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm);
 
           if (CL_rc == -1) return -1;
 
@@ -2150,7 +2150,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
+          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tm_c, device_param->opencl_d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -2193,7 +2193,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     if (run_init == true)
     {
-      CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_amp_buf, device_param->d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
+      CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
 
@@ -2217,13 +2217,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
 
         module_ctx->module_hook12 (device_param, hashes->hook_salts_buf, salt_pos, pws_cnt);
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -2289,13 +2289,13 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
 
         module_ctx->module_hook23 (device_param, hashes->hook_salts_buf, salt_pos, pws_cnt);
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -2440,51 +2440,51 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
   switch (kern_run)
   {
     case KERN_RUN_1:
-      kernel          = device_param->kernel1;
+      kernel          = device_param->opencl_kernel1;
       kernel_threads  = device_param->kernel_wgs1;
       break;
     case KERN_RUN_12:
-      kernel          = device_param->kernel12;
+      kernel          = device_param->opencl_kernel12;
       kernel_threads  = device_param->kernel_wgs12;
       break;
     case KERN_RUN_2:
-      kernel          = device_param->kernel2;
+      kernel          = device_param->opencl_kernel2;
       kernel_threads  = device_param->kernel_wgs2;
       break;
     case KERN_RUN_23:
-      kernel          = device_param->kernel23;
+      kernel          = device_param->opencl_kernel23;
       kernel_threads  = device_param->kernel_wgs23;
       break;
     case KERN_RUN_3:
-      kernel          = device_param->kernel3;
+      kernel          = device_param->opencl_kernel3;
       kernel_threads  = device_param->kernel_wgs3;
       break;
     case KERN_RUN_4:
-      kernel          = device_param->kernel4;
+      kernel          = device_param->opencl_kernel4;
       kernel_threads  = device_param->kernel_wgs4;
       break;
     case KERN_RUN_INIT2:
-      kernel          = device_param->kernel_init2;
+      kernel          = device_param->opencl_kernel_init2;
       kernel_threads  = device_param->kernel_wgs_init2;
       break;
     case KERN_RUN_LOOP2:
-      kernel          = device_param->kernel_loop2;
+      kernel          = device_param->opencl_kernel_loop2;
       kernel_threads  = device_param->kernel_wgs_loop2;
       break;
     case KERN_RUN_AUX1:
-      kernel          = device_param->kernel_aux1;
+      kernel          = device_param->opencl_kernel_aux1;
       kernel_threads  = device_param->kernel_wgs_aux1;
       break;
     case KERN_RUN_AUX2:
-      kernel          = device_param->kernel_aux2;
+      kernel          = device_param->opencl_kernel_aux2;
       kernel_threads  = device_param->kernel_wgs_aux2;
       break;
     case KERN_RUN_AUX3:
-      kernel          = device_param->kernel_aux3;
+      kernel          = device_param->opencl_kernel_aux3;
       kernel_threads  = device_param->kernel_wgs_aux3;
       break;
     case KERN_RUN_AUX4:
-      kernel          = device_param->kernel_aux4;
+      kernel          = device_param->opencl_kernel_aux4;
       kernel_threads  = device_param->kernel_wgs_aux4;
       break;
     default:
@@ -2697,15 +2697,15 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   switch (kern_run)
   {
     case KERN_RUN_MP:
-      kernel          = device_param->kernel_mp;
+      kernel          = device_param->opencl_kernel_mp;
       kernel_threads  = device_param->kernel_wgs_mp;
       break;
     case KERN_RUN_MP_R:
-      kernel          = device_param->kernel_mp_r;
+      kernel          = device_param->opencl_kernel_mp_r;
       kernel_threads  = device_param->kernel_wgs_mp_r;
       break;
     case KERN_RUN_MP_L:
-      kernel          = device_param->kernel_mp_l;
+      kernel          = device_param->opencl_kernel_mp_l;
       kernel_threads  = device_param->kernel_wgs_mp_l;
       break;
     default:
@@ -2767,7 +2767,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
   const u64 kernel_threads = MIN (num_elements, device_param->kernel_wgs_tm);
 
-  cl_kernel kernel = device_param->kernel_tm;
+  cl_kernel kernel = device_param->opencl_kernel_tm;
 
   const size_t global_work_size[3] = { num_elements,    1, 1 };
   const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
@@ -2799,7 +2799,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
   num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
-  cl_kernel kernel = device_param->kernel_amp;
+  cl_kernel kernel = device_param->opencl_kernel_amp;
 
   int CL_rc;
 
@@ -2835,7 +2835,7 @@ int run_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
 
   num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
-  cl_kernel kernel = device_param->kernel_atinit;
+  cl_kernel kernel = device_param->opencl_kernel_atinit;
 
   const size_t global_work_size[3] = { num_elements,    1, 1 };
   const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
@@ -2881,7 +2881,7 @@ int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
 
     num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
-    cl_kernel kernel = device_param->kernel_memset;
+    cl_kernel kernel = device_param->opencl_kernel_memset;
 
     int CL_rc;
 
@@ -2934,7 +2934,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
   num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
-  cl_kernel kernel = device_param->kernel_decompress;
+  cl_kernel kernel = device_param->opencl_kernel_decompress;
 
   const size_t global_work_size[3] = { num_elements,    1, 1 };
   const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
@@ -2990,7 +2990,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
   {
     int CL_rc;
 
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
     if (CL_rc == -1) return -1;
 
@@ -3000,7 +3000,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
     if (off)
     {
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
     }
@@ -3015,7 +3015,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
     {
       int CL_rc;
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
 
@@ -3025,7 +3025,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
       if (off)
       {
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -3074,7 +3074,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         int CL_rc;
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
 
@@ -3084,7 +3084,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         if (off)
         {
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -3099,7 +3099,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
         {
           int CL_rc;
 
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
 
@@ -3109,7 +3109,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (off)
           {
-            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
             if (CL_rc == -1) return -1;
           }
@@ -3122,7 +3122,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
         {
           int CL_rc;
 
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
 
@@ -3132,7 +3132,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (off)
           {
-            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
 
             if (CL_rc == -1) return -1;
           }
@@ -3331,7 +3331,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
       {
         if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
-          const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_rules, device_param->d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL);
+          const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -3437,7 +3437,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               innerloop_left = i;
 
-              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
+              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3453,7 +3453,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (CL_rc == -1) return -1;
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3469,7 +3469,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (CL_rc == -1) return -1;
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3576,7 +3576,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               innerloop_left = i;
 
-              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
+              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3592,7 +3592,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (CL_rc == -1) return -1;
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
 
               if (CL_rc == -1) return -1;
             }
@@ -3610,7 +3610,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bfs, device_param->d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL);
+          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs, device_param->opencl_d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -5453,7 +5453,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + opencl_devices_cnt;
   backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
 
-  if (backend_ctx->backend_devices_cnt == 0)
+  if (backend_ctx->backend_devices_active == 0)
   {
     event_log_error (hashcat_ctx, "No devices found/left.");
 
@@ -6525,17 +6525,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->is_opencl)
         {
-          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->program);
+          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->opencl_device, build_options_module_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program, 1, &device_param->opencl_device, build_options_module_buf, NULL, NULL);
 
           //if (CL_rc == -1) return -1;
 
           size_t build_log_size = 0;
 
-          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
 
           //if (CL_rc == -1) return -1;
 
@@ -6547,7 +6547,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
 
             if (CL_rc_build == -1) return -1;
 
@@ -6569,13 +6569,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             size_t binary_size;
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
 
             if (CL_rc == -1) return -1;
 
             char *binary = (char *) hcmalloc (binary_size);
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
 
             if (CL_rc == -1) return -1;
 
@@ -6600,11 +6600,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->is_opencl)
         {
-          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program);
+          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program, 1, &device_param->opencl_device, build_options_module_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program, 1, &device_param->opencl_device, build_options_module_buf, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -6688,17 +6688,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->program_mp);
+          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program_mp);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
           //if (CL_rc == -1) return -1;
 
           size_t build_log_size = 0;
 
-          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
 
           //if (CL_rc == -1) return -1;
 
@@ -6710,7 +6710,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
 
             if (CL_rc_build == -1) return -1;
 
@@ -6732,13 +6732,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             size_t binary_size;
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program_mp, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_mp, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
 
             if (CL_rc == -1) return -1;
 
             char *binary = (char *) hcmalloc (binary_size);
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program_mp, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_mp, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
 
             if (CL_rc == -1) return -1;
 
@@ -6753,11 +6753,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_mp);
+          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program_mp);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -6843,17 +6843,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->program_amp);
+          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program_amp);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
           //if (CL_rc == -1) return -1;
 
           size_t build_log_size = 0;
 
-          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
 
           //if (CL_rc == -1) return -1;
 
@@ -6865,7 +6865,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-            int CL_rc_build_info = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            int CL_rc_build_info = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
 
             if (CL_rc_build_info == -1) return -1;
 
@@ -6887,13 +6887,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             size_t binary_size;
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program_amp, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_amp, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
 
             if (CL_rc == -1) return -1;
 
             char *binary = (char *) hcmalloc (binary_size);
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->program_amp, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_amp, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
 
             if (CL_rc == -1) return -1;
 
@@ -6908,11 +6908,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->program_amp);
+          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program_amp);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
           if (CL_rc == -1) return -1;
         }
@@ -6942,36 +6942,36 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     int CL_rc;
 
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_a);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_b);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_c);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_d);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_a);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_b);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_c);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s2_d);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_plains,             NULL, &device_param->d_plain_bufs);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_digests,            NULL, &device_param->d_digests_buf);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_shown,              NULL, &device_param->d_digests_shown);  if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_salts,              NULL, &device_param->d_salt_bufs);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_results,            NULL, &device_param->d_result);         if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra0_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra1_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra2_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->d_extra3_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->d_st_digests_buf); if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->d_st_salts_buf);   if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_b);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_c);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_d);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_a);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_b);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_c);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_d);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_plains,             NULL, &device_param->opencl_d_plain_bufs);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_digests,            NULL, &device_param->opencl_d_digests_buf);    if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_shown,              NULL, &device_param->opencl_d_digests_shown);  if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_salts,              NULL, &device_param->opencl_d_salt_bufs);      if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_results,            NULL, &device_param->opencl_d_result);         if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra0_buf);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra1_buf);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra2_buf);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra3_buf);     if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->opencl_d_st_digests_buf); if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->opencl_d_st_salts_buf);   if (CL_rc == -1) return -1;
 
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s1_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s1_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s1_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s1_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s2_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s2_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s2_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bitmap_s2_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_digests_buf,     CL_TRUE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_salt_bufs,       CL_TRUE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf,     CL_TRUE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_salt_bufs,       CL_TRUE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL); if (CL_rc == -1) return -1;
 
     /**
      * special buffers
@@ -6979,57 +6979,57 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (user_options->slow_candidates == true)
     {
-      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->d_rules_c); if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c); if (CL_rc == -1) return -1;
     }
     else
     {
       if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->d_rules);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->d_rules_c); if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->opencl_d_rules);   if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c); if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_rules, CL_TRUE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_TRUE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->d_combs);          if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->d_combs_c);        if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->d_root_css_buf);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->d_markov_css_buf); if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs);          if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs_c);        if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf);   if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf); if (CL_rc == -1) return -1;
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->d_bfs);            if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->d_bfs_c);          if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm,         NULL, &device_param->d_tm_c);           if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->d_root_css_buf);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->d_markov_css_buf); if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs);            if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs_c);          if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm,         NULL, &device_param->opencl_d_tm_c);           if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf);   if (CL_rc == -1) return -1;
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf); if (CL_rc == -1) return -1;
       }
     }
 
     if (size_esalts)
     {
-      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->d_esalt_bufs);
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->opencl_d_esalt_bufs);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_esalt_bufs, CL_TRUE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_TRUE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
     }
 
     if (hashconfig->st_hash != NULL)
     {
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_st_digests_buf,  CL_TRUE, 0, size_st_digests,         hashes->st_digests_buf,  0, NULL, NULL); if (CL_rc == -1) return -1;
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_st_salts_buf,    CL_TRUE, 0, size_st_salts,           hashes->st_salts_buf,    0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_digests_buf,  CL_TRUE, 0, size_st_digests,         hashes->st_digests_buf,  0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_salts_buf,    CL_TRUE, 0, size_st_salts,           hashes->st_salts_buf,    0, NULL, NULL); if (CL_rc == -1) return -1;
 
       if (size_esalts)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->d_st_esalts_buf);
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->opencl_d_st_esalts_buf);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_st_esalts_buf, CL_TRUE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_TRUE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
@@ -7051,30 +7051,30 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     device_param->kernel_params_buf32[33] = 0; // combs_mode
     device_param->kernel_params_buf64[34] = 0; // gid_max
 
-    device_param->kernel_params[ 0] = NULL; // &device_param->d_pws_buf;
-    device_param->kernel_params[ 1] = &device_param->d_rules_c;
-    device_param->kernel_params[ 2] = &device_param->d_combs_c;
-    device_param->kernel_params[ 3] = &device_param->d_bfs_c;
-    device_param->kernel_params[ 4] = NULL; // &device_param->d_tmps;
-    device_param->kernel_params[ 5] = NULL; // &device_param->d_hooks;
-    device_param->kernel_params[ 6] = &device_param->d_bitmap_s1_a;
-    device_param->kernel_params[ 7] = &device_param->d_bitmap_s1_b;
-    device_param->kernel_params[ 8] = &device_param->d_bitmap_s1_c;
-    device_param->kernel_params[ 9] = &device_param->d_bitmap_s1_d;
-    device_param->kernel_params[10] = &device_param->d_bitmap_s2_a;
-    device_param->kernel_params[11] = &device_param->d_bitmap_s2_b;
-    device_param->kernel_params[12] = &device_param->d_bitmap_s2_c;
-    device_param->kernel_params[13] = &device_param->d_bitmap_s2_d;
-    device_param->kernel_params[14] = &device_param->d_plain_bufs;
-    device_param->kernel_params[15] = &device_param->d_digests_buf;
-    device_param->kernel_params[16] = &device_param->d_digests_shown;
-    device_param->kernel_params[17] = &device_param->d_salt_bufs;
-    device_param->kernel_params[18] = &device_param->d_esalt_bufs;
-    device_param->kernel_params[19] = &device_param->d_result;
-    device_param->kernel_params[20] = &device_param->d_extra0_buf;
-    device_param->kernel_params[21] = &device_param->d_extra1_buf;
-    device_param->kernel_params[22] = &device_param->d_extra2_buf;
-    device_param->kernel_params[23] = &device_param->d_extra3_buf;
+    device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf;
+    device_param->kernel_params[ 1] = &device_param->opencl_d_rules_c;
+    device_param->kernel_params[ 2] = &device_param->opencl_d_combs_c;
+    device_param->kernel_params[ 3] = &device_param->opencl_d_bfs_c;
+    device_param->kernel_params[ 4] = NULL; // &device_param->opencl_d_tmps;
+    device_param->kernel_params[ 5] = NULL; // &device_param->opencl_d_hooks;
+    device_param->kernel_params[ 6] = &device_param->opencl_d_bitmap_s1_a;
+    device_param->kernel_params[ 7] = &device_param->opencl_d_bitmap_s1_b;
+    device_param->kernel_params[ 8] = &device_param->opencl_d_bitmap_s1_c;
+    device_param->kernel_params[ 9] = &device_param->opencl_d_bitmap_s1_d;
+    device_param->kernel_params[10] = &device_param->opencl_d_bitmap_s2_a;
+    device_param->kernel_params[11] = &device_param->opencl_d_bitmap_s2_b;
+    device_param->kernel_params[12] = &device_param->opencl_d_bitmap_s2_c;
+    device_param->kernel_params[13] = &device_param->opencl_d_bitmap_s2_d;
+    device_param->kernel_params[14] = &device_param->opencl_d_plain_bufs;
+    device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
+    device_param->kernel_params[16] = &device_param->opencl_d_digests_shown;
+    device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
+    device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
+    device_param->kernel_params[19] = &device_param->opencl_d_result;
+    device_param->kernel_params[20] = &device_param->opencl_d_extra0_buf;
+    device_param->kernel_params[21] = &device_param->opencl_d_extra1_buf;
+    device_param->kernel_params[22] = &device_param->opencl_d_extra2_buf;
+    device_param->kernel_params[23] = &device_param->opencl_d_extra3_buf;
     device_param->kernel_params[24] = &device_param->kernel_params_buf32[24];
     device_param->kernel_params[25] = &device_param->kernel_params_buf32[25];
     device_param->kernel_params[26] = &device_param->kernel_params_buf32[26];
@@ -7101,24 +7101,24 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
       {
-        device_param->kernel_params_mp[0] = &device_param->d_combs;
+        device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
       }
       else
       {
         if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
         {
-          device_param->kernel_params_mp[0] = &device_param->d_combs;
+          device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
         }
         else
         {
           device_param->kernel_params_mp[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                    // ? &device_param->d_pws_buf
-                                                    // : &device_param->d_pws_amp_buf;
+                                                    // ? &device_param->opencl_d_pws_buf
+                                                    // : &device_param->opencl_d_pws_amp_buf;
         }
       }
 
-      device_param->kernel_params_mp[1] = &device_param->d_root_css_buf;
-      device_param->kernel_params_mp[2] = &device_param->d_markov_css_buf;
+      device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf;
+      device_param->kernel_params_mp[2] = &device_param->opencl_d_markov_css_buf;
       device_param->kernel_params_mp[3] = &device_param->kernel_params_mp_buf64[3];
       device_param->kernel_params_mp[4] = &device_param->kernel_params_mp_buf32[4];
       device_param->kernel_params_mp[5] = &device_param->kernel_params_mp_buf32[5];
@@ -7135,10 +7135,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_mp_l_buf64[9] = 0;
 
       device_param->kernel_params_mp_l[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                  // ? &device_param->d_pws_buf
-                                                  // : &device_param->d_pws_amp_buf;
-      device_param->kernel_params_mp_l[1] = &device_param->d_root_css_buf;
-      device_param->kernel_params_mp_l[2] = &device_param->d_markov_css_buf;
+                                                  // ? &device_param->opencl_d_pws_buf
+                                                  // : &device_param->opencl_d_pws_amp_buf;
+      device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf;
+      device_param->kernel_params_mp_l[2] = &device_param->opencl_d_markov_css_buf;
       device_param->kernel_params_mp_l[3] = &device_param->kernel_params_mp_l_buf64[3];
       device_param->kernel_params_mp_l[4] = &device_param->kernel_params_mp_l_buf32[4];
       device_param->kernel_params_mp_l[5] = &device_param->kernel_params_mp_l_buf32[5];
@@ -7154,9 +7154,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_mp_r_buf32[7] = 0;
       device_param->kernel_params_mp_r_buf64[8] = 0;
 
-      device_param->kernel_params_mp_r[0] = &device_param->d_bfs;
-      device_param->kernel_params_mp_r[1] = &device_param->d_root_css_buf;
-      device_param->kernel_params_mp_r[2] = &device_param->d_markov_css_buf;
+      device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs;
+      device_param->kernel_params_mp_r[1] = &device_param->opencl_d_root_css_buf;
+      device_param->kernel_params_mp_r[2] = &device_param->opencl_d_markov_css_buf;
       device_param->kernel_params_mp_r[3] = &device_param->kernel_params_mp_r_buf64[3];
       device_param->kernel_params_mp_r[4] = &device_param->kernel_params_mp_r_buf32[4];
       device_param->kernel_params_mp_r[5] = &device_param->kernel_params_mp_r_buf32[5];
@@ -7167,16 +7167,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_amp_buf32[5] = 0; // combs_mode
       device_param->kernel_params_amp_buf64[6] = 0; // gid_max
 
-      device_param->kernel_params_amp[0] = NULL; // &device_param->d_pws_buf;
-      device_param->kernel_params_amp[1] = NULL; // &device_param->d_pws_amp_buf;
-      device_param->kernel_params_amp[2] = &device_param->d_rules_c;
-      device_param->kernel_params_amp[3] = &device_param->d_combs_c;
-      device_param->kernel_params_amp[4] = &device_param->d_bfs_c;
+      device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf;
+      device_param->kernel_params_amp[1] = NULL; // &device_param->opencl_d_pws_amp_buf;
+      device_param->kernel_params_amp[2] = &device_param->opencl_d_rules_c;
+      device_param->kernel_params_amp[3] = &device_param->opencl_d_combs_c;
+      device_param->kernel_params_amp[4] = &device_param->opencl_d_bfs_c;
       device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5];
       device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf64[6];
 
-      device_param->kernel_params_tm[0] = &device_param->d_bfs_c;
-      device_param->kernel_params_tm[1] = &device_param->d_tm_c;
+      device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c;
+      device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c;
     }
 
     device_param->kernel_params_memset_buf32[1] = 0; // value
@@ -7193,11 +7193,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     device_param->kernel_params_decompress_buf64[3] = 0; // gid_max
 
-    device_param->kernel_params_decompress[0] = NULL; // &device_param->d_pws_idx;
-    device_param->kernel_params_decompress[1] = NULL; // &device_param->d_pws_comp_buf;
+    device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx;
+    device_param->kernel_params_decompress[1] = NULL; // &device_param->opencl_d_pws_comp_buf;
     device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                      // ? &device_param->d_pws_buf
-                                                      // : &device_param->d_pws_amp_buf;
+                                                      // ? &device_param->opencl_d_pws_buf
+                                                      // : &device_param->opencl_d_pws_amp_buf;
     device_param->kernel_params_decompress[3] = &device_param->kernel_params_decompress_buf64[3];
 
     /**
@@ -7216,19 +7216,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel1);
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_wgs1);
+          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_local_mem_size1);
+          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_preferred_wgs_multiple1);
+          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
 
           if (CL_rc == -1) return -1;
 
@@ -7236,19 +7236,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel2);
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_wgs2);
+          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_local_mem_size2);
+          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_preferred_wgs_multiple2);
+          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
 
           if (CL_rc == -1) return -1;
 
@@ -7256,19 +7256,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel3);
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_wgs3);
+          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_local_mem_size3);
+          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_preferred_wgs_multiple3);
+          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
 
           if (CL_rc == -1) return -1;
         }
@@ -7276,19 +7276,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel4);
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel4);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_wgs4);
+          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_wgs4);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_local_mem_size4);
+          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_local_mem_size4);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_preferred_wgs_multiple4);
+          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_preferred_wgs_multiple4);
 
           if (CL_rc == -1) return -1;
         }
@@ -7301,19 +7301,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel1);
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_wgs1);
+          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_local_mem_size1);
+          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_preferred_wgs_multiple1);
+          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
 
           if (CL_rc == -1) return -1;
 
@@ -7321,19 +7321,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel2);
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_wgs2);
+          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_local_mem_size2);
+          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_preferred_wgs_multiple2);
+          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
 
           if (CL_rc == -1) return -1;
 
@@ -7341,19 +7341,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel3);
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_wgs3);
+          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_local_mem_size3);
+          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_preferred_wgs_multiple3);
+          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
 
           if (CL_rc == -1) return -1;
         }
@@ -7361,19 +7361,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel4);
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel4);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_wgs4);
+          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_wgs4);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_local_mem_size4);
+          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_local_mem_size4);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_preferred_wgs_multiple4);
+          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_preferred_wgs_multiple4);
 
           if (CL_rc == -1) return -1;
         }
@@ -7390,19 +7390,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
 
-            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel_tm);
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_tm);
 
             if (CL_rc == -1) return -1;
 
-            CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_tm, &device_param->kernel_wgs_tm);
+            CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_wgs_tm);
 
             if (CL_rc == -1) return -1;
 
-            CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_tm, &device_param->kernel_local_mem_size_tm);
+            CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_local_mem_size_tm);
 
             if (CL_rc == -1) return -1;
 
-            CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_tm, &device_param->kernel_preferred_wgs_multiple_tm);
+            CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_preferred_wgs_multiple_tm);
 
             if (CL_rc == -1) return -1;
           }
@@ -7415,19 +7415,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type);
 
-      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel1);
+      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_wgs1);
+      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_local_mem_size1);
+      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_preferred_wgs_multiple1);
+      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
 
       if (CL_rc == -1) return -1;
 
@@ -7435,19 +7435,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type);
 
-      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel2);
+      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_wgs2);
+      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_local_mem_size2);
+      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_preferred_wgs_multiple2);
+      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
 
       if (CL_rc == -1) return -1;
 
@@ -7455,19 +7455,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type);
 
-      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel3);
+      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_wgs3);
+      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_local_mem_size3);
+      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_preferred_wgs_multiple3);
+      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
 
       if (CL_rc == -1) return -1;
 
@@ -7477,19 +7477,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type);
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel12);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel12);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel12, &device_param->kernel_wgs12);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_wgs12);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel12, &device_param->kernel_local_mem_size12);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_local_mem_size12);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel12, &device_param->kernel_preferred_wgs_multiple12);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_preferred_wgs_multiple12);
 
         if (CL_rc == -1) return -1;
       }
@@ -7500,19 +7500,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type);
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel23);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel23);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel23, &device_param->kernel_wgs23);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_wgs23);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel23, &device_param->kernel_local_mem_size23);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_local_mem_size23);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel23, &device_param->kernel_preferred_wgs_multiple23);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_preferred_wgs_multiple23);
 
         if (CL_rc == -1) return -1;
       }
@@ -7523,19 +7523,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type);
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel_init2);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_init2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_init2, &device_param->kernel_wgs_init2);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_wgs_init2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_init2, &device_param->kernel_local_mem_size_init2);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_local_mem_size_init2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_init2, &device_param->kernel_preferred_wgs_multiple_init2);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_preferred_wgs_multiple_init2);
 
         if (CL_rc == -1) return -1;
       }
@@ -7546,19 +7546,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type);
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel_loop2);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_loop2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_loop2, &device_param->kernel_wgs_loop2);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_wgs_loop2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_loop2, &device_param->kernel_local_mem_size_loop2);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_local_mem_size_loop2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_loop2, &device_param->kernel_preferred_wgs_multiple_loop2);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_preferred_wgs_multiple_loop2);
 
         if (CL_rc == -1) return -1;
       }
@@ -7569,19 +7569,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type);
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel_aux1);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux1);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_aux1, &device_param->kernel_wgs_aux1);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_wgs_aux1);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_aux1, &device_param->kernel_local_mem_size_aux1);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_local_mem_size_aux1);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_aux1, &device_param->kernel_preferred_wgs_multiple_aux1);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_preferred_wgs_multiple_aux1);
 
         if (CL_rc == -1) return -1;
       }
@@ -7592,19 +7592,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type);
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel_aux2);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_aux2, &device_param->kernel_wgs_aux2);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_wgs_aux2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_aux2, &device_param->kernel_local_mem_size_aux2);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_local_mem_size_aux2);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_aux2, &device_param->kernel_preferred_wgs_multiple_aux2);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_preferred_wgs_multiple_aux2);
 
         if (CL_rc == -1) return -1;
       }
@@ -7615,19 +7615,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type);
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel_aux3);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux3);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_aux3, &device_param->kernel_wgs_aux3);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_wgs_aux3);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_aux3, &device_param->kernel_local_mem_size_aux3);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_local_mem_size_aux3);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_aux3, &device_param->kernel_preferred_wgs_multiple_aux3);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_preferred_wgs_multiple_aux3);
 
         if (CL_rc == -1) return -1;
       }
@@ -7638,19 +7638,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type);
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel_aux4);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux4);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_aux4, &device_param->kernel_wgs_aux4);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_wgs_aux4);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_aux4, &device_param->kernel_local_mem_size_aux4);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_local_mem_size_aux4);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_aux4, &device_param->kernel_preferred_wgs_multiple_aux4);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_preferred_wgs_multiple_aux4);
 
         if (CL_rc == -1) return -1;
       }
@@ -7658,69 +7658,69 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     // GPU memset
 
-    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, "gpu_memset", &device_param->kernel_memset);
+    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_memset", &device_param->opencl_kernel_memset);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_memset, &device_param->kernel_wgs_memset);
+    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_wgs_memset);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_memset, &device_param->kernel_local_mem_size_memset);
+    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_local_mem_size_memset);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset);
+    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_memset, 0, sizeof (cl_mem),   device_param->kernel_params_memset[0]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_memset, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 0, sizeof (cl_mem),   device_param->kernel_params_memset[0]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
 
     // GPU autotune init
 
-    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, "gpu_atinit", &device_param->kernel_atinit);
+    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_atinit", &device_param->opencl_kernel_atinit);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_atinit, &device_param->kernel_wgs_atinit);
+    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_wgs_atinit);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_atinit, &device_param->kernel_local_mem_size_atinit);
+    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_local_mem_size_atinit);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_atinit, &device_param->kernel_preferred_wgs_multiple_atinit);
+    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_preferred_wgs_multiple_atinit);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
 
     // GPU decompress
 
-    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, "gpu_decompress", &device_param->kernel_decompress);
+    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_decompress", &device_param->opencl_kernel_decompress);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_decompress, &device_param->kernel_wgs_decompress);
+    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_wgs_decompress);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_decompress, &device_param->kernel_local_mem_size_decompress);
+    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_local_mem_size_decompress);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress);
+    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress);
 
     if (CL_rc == -1) return -1;
 
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 0, sizeof (cl_mem),   device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 1, sizeof (cl_mem),   device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 2, sizeof (cl_mem),   device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem),   device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem),   device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem),   device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
 
     // MP start
 
@@ -7733,79 +7733,79 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         // mp_l
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program_mp, "l_markov", &device_param->kernel_mp_l);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "l_markov", &device_param->opencl_kernel_mp_l);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_mp_l, &device_param->kernel_wgs_mp_l);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_wgs_mp_l);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_mp_l, &device_param->kernel_local_mem_size_mp_l);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_local_mem_size_mp_l);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_mp_l, &device_param->kernel_preferred_wgs_multiple_mp_l);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_preferred_wgs_multiple_mp_l);
 
         if (CL_rc == -1) return -1;
 
         // mp_r
 
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program_mp, "r_markov", &device_param->kernel_mp_r);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "r_markov", &device_param->opencl_kernel_mp_r);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_mp_r, &device_param->kernel_wgs_mp_r);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_wgs_mp_r);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_mp_r, &device_param->kernel_local_mem_size_mp_r);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_local_mem_size_mp_r);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r);
 
         if (CL_rc == -1) return -1;
 
         if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
         {
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
         }
       }
       else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
       {
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program_mp, "C_markov", &device_param->kernel_mp);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "C_markov", &device_param->opencl_kernel_mp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_mp, &device_param->kernel_wgs_mp);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_wgs_mp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_mp, &device_param->kernel_local_mem_size_mp);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_local_mem_size_mp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_mp, &device_param->kernel_preferred_wgs_multiple_mp);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_preferred_wgs_multiple_mp);
 
         if (CL_rc == -1) return -1;
       }
       else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
       {
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program_mp, "C_markov", &device_param->kernel_mp);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "C_markov", &device_param->opencl_kernel_mp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_mp, &device_param->kernel_wgs_mp);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_wgs_mp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_mp, &device_param->kernel_local_mem_size_mp);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_local_mem_size_mp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_mp, &device_param->kernel_preferred_wgs_multiple_mp);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_preferred_wgs_multiple_mp);
 
         if (CL_rc == -1) return -1;
       }
@@ -7822,19 +7822,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
       else
       {
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program_amp, "amp", &device_param->kernel_amp);
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_amp, "amp", &device_param->opencl_kernel_amp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->kernel_amp, &device_param->kernel_wgs_amp);
+        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_wgs_amp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->kernel_amp, &device_param->kernel_local_mem_size_amp);
+        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_local_mem_size_amp);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->kernel_amp, &device_param->kernel_preferred_wgs_multiple_amp);
+        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_preferred_wgs_multiple_amp);
 
         if (CL_rc == -1) return -1;
       }
@@ -7847,21 +7847,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         for (u32 i = 0; i < 5; i++)
         {
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
 
           if (CL_rc == -1) return -1;
         }
 
         for (u32 i = 5; i < 6; i++)
         {
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
 
           if (CL_rc == -1) return -1;
         }
 
         for (u32 i = 6; i < 7; i++)
         {
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
 
           if (CL_rc == -1) return -1;
         }
@@ -7870,9 +7870,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     // zero some data buffers
 
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
 
     /**
      * special buffers
@@ -7880,28 +7880,28 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (user_options->slow_candidates == true)
     {
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
     }
     else
     {
       if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
       {
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
       {
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_combs,          size_combs);       if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_combs_c,        size_combs);       if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs,          size_combs);       if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c,        size_combs);       if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
       {
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_bfs,            size_bfs);         if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_bfs_c,          size_bfs);         if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tm_c,           size_tm);          if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs,            size_bfs);         if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c,          size_bfs);         if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c,           size_tm);          if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
+        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
       }
     }
 
@@ -7935,7 +7935,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           device_param->kernel_params_mp_buf32[7] = 0;
         }
 
-        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
       }
       else if (user_options->attack_mode == ATTACK_MODE_BF)
       {
@@ -7953,8 +7953,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
         if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;
 
-        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
-        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
       }
     }
 
@@ -8157,19 +8157,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     device_param->size_brain_link_out = size_brain_link_out;
     #endif
 
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws,      NULL, &device_param->d_pws_buf);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws_amp,  NULL, &device_param->d_pws_amp_buf);  if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_comp, NULL, &device_param->d_pws_comp_buf); if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_idx,  NULL, &device_param->d_pws_idx);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_tmps,     NULL, &device_param->d_tmps);         if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_hooks,    NULL, &device_param->d_hooks);        if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws,      NULL, &device_param->opencl_d_pws_buf);      if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws_amp,  NULL, &device_param->opencl_d_pws_amp_buf);  if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_comp, NULL, &device_param->opencl_d_pws_comp_buf); if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_idx,  NULL, &device_param->opencl_d_pws_idx);      if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_tmps,     NULL, &device_param->opencl_d_tmps);         if (CL_rc == -1) return -1;
+    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_hooks,    NULL, &device_param->opencl_d_hooks);        if (CL_rc == -1) return -1;
 
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_amp_buf,   device_param->size_pws_amp);  if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_comp_buf,  device_param->size_pws_comp); if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_idx,       device_param->size_pws_idx);  if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_amp_buf,   device_param->size_pws_amp);  if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_comp_buf,  device_param->size_pws_comp); if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_idx,       device_param->size_pws_idx);  if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
 
     /**
      * main host data
@@ -8218,9 +8218,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * kernel args
      */
 
-    device_param->kernel_params[ 0] = &device_param->d_pws_buf;
-    device_param->kernel_params[ 4] = &device_param->d_tmps;
-    device_param->kernel_params[ 5] = &device_param->d_hooks;
+    device_param->kernel_params[ 0] = &device_param->opencl_d_pws_buf;
+    device_param->kernel_params[ 4] = &device_param->opencl_d_tmps;
+    device_param->kernel_params[ 5] = &device_param->opencl_d_hooks;
 
     if (user_options->slow_candidates == true)
     {
@@ -8236,20 +8236,20 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
         {
           device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                            ? &device_param->d_pws_buf
-                                            : &device_param->d_pws_amp_buf;
+                                            ? &device_param->opencl_d_pws_buf
+                                            : &device_param->opencl_d_pws_amp_buf;
 
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
         }
       }
 
       if (user_options->attack_mode == ATTACK_MODE_BF)
       {
         device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                            ? &device_param->d_pws_buf
-                                            : &device_param->d_pws_amp_buf;
+                                            ? &device_param->opencl_d_pws_buf
+                                            : &device_param->opencl_d_pws_amp_buf;
 
-        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
+        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
       }
 
       if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -8258,23 +8258,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
       else
       {
-        device_param->kernel_params_amp[0] = &device_param->d_pws_buf;
-        device_param->kernel_params_amp[1] = &device_param->d_pws_amp_buf;
+        device_param->kernel_params_amp[0] = &device_param->opencl_d_pws_buf;
+        device_param->kernel_params_amp[1] = &device_param->opencl_d_pws_amp_buf;
 
-        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1;
-        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
+        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1;
+        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
       }
     }
 
-    device_param->kernel_params_decompress[0] = &device_param->d_pws_idx;
-    device_param->kernel_params_decompress[1] = &device_param->d_pws_comp_buf;
+    device_param->kernel_params_decompress[0] = &device_param->opencl_d_pws_idx;
+    device_param->kernel_params_decompress[1] = &device_param->opencl_d_pws_comp_buf;
     device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                              ? &device_param->d_pws_buf
-                                              : &device_param->d_pws_amp_buf;
+                                              ? &device_param->opencl_d_pws_buf
+                                              : &device_param->opencl_d_pws_amp_buf;
 
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
 
     hardware_power_all += device_param->hardware_power;
 
@@ -8317,67 +8317,67 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_opencl == true)
     {
-      if (device_param->d_pws_buf)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_buf);
-      if (device_param->d_pws_amp_buf)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_amp_buf);
-      if (device_param->d_pws_comp_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_comp_buf);
-      if (device_param->d_pws_idx)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_idx);
-      if (device_param->d_rules)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_rules);
-      if (device_param->d_rules_c)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_rules_c);
-      if (device_param->d_combs)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_combs);
-      if (device_param->d_combs_c)        hc_clReleaseMemObject (hashcat_ctx, device_param->d_combs_c);
-      if (device_param->d_bfs)            hc_clReleaseMemObject (hashcat_ctx, device_param->d_bfs);
-      if (device_param->d_bfs_c)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_bfs_c);
-      if (device_param->d_bitmap_s1_a)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_a);
-      if (device_param->d_bitmap_s1_b)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_b);
-      if (device_param->d_bitmap_s1_c)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_c);
-      if (device_param->d_bitmap_s1_d)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s1_d);
-      if (device_param->d_bitmap_s2_a)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_a);
-      if (device_param->d_bitmap_s2_b)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_b);
-      if (device_param->d_bitmap_s2_c)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_c);
-      if (device_param->d_bitmap_s2_d)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_bitmap_s2_d);
-      if (device_param->d_plain_bufs)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_plain_bufs);
-      if (device_param->d_digests_buf)    hc_clReleaseMemObject (hashcat_ctx, device_param->d_digests_buf);
-      if (device_param->d_digests_shown)  hc_clReleaseMemObject (hashcat_ctx, device_param->d_digests_shown);
-      if (device_param->d_salt_bufs)      hc_clReleaseMemObject (hashcat_ctx, device_param->d_salt_bufs);
-      if (device_param->d_esalt_bufs)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_esalt_bufs);
-      if (device_param->d_tmps)           hc_clReleaseMemObject (hashcat_ctx, device_param->d_tmps);
-      if (device_param->d_hooks)          hc_clReleaseMemObject (hashcat_ctx, device_param->d_hooks);
-      if (device_param->d_result)         hc_clReleaseMemObject (hashcat_ctx, device_param->d_result);
-      if (device_param->d_extra0_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra0_buf);
-      if (device_param->d_extra1_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra1_buf);
-      if (device_param->d_extra2_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra2_buf);
-      if (device_param->d_extra3_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->d_extra3_buf);
-      if (device_param->d_root_css_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_root_css_buf);
-      if (device_param->d_markov_css_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_markov_css_buf);
-      if (device_param->d_tm_c)           hc_clReleaseMemObject (hashcat_ctx, device_param->d_tm_c);
-      if (device_param->d_st_digests_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_digests_buf);
-      if (device_param->d_st_salts_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_salts_buf);
-      if (device_param->d_st_esalts_buf)  hc_clReleaseMemObject (hashcat_ctx, device_param->d_st_esalts_buf);
+      if (device_param->opencl_d_pws_buf)        hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_buf);
+      if (device_param->opencl_d_pws_amp_buf)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_amp_buf);
+      if (device_param->opencl_d_pws_comp_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_comp_buf);
+      if (device_param->opencl_d_pws_idx)        hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_idx);
+      if (device_param->opencl_d_rules)          hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_rules);
+      if (device_param->opencl_d_rules_c)        hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_rules_c);
+      if (device_param->opencl_d_combs)          hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_combs);
+      if (device_param->opencl_d_combs_c)        hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_combs_c);
+      if (device_param->opencl_d_bfs)            hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bfs);
+      if (device_param->opencl_d_bfs_c)          hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bfs_c);
+      if (device_param->opencl_d_bitmap_s1_a)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bitmap_s1_a);
+      if (device_param->opencl_d_bitmap_s1_b)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bitmap_s1_b);
+      if (device_param->opencl_d_bitmap_s1_c)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bitmap_s1_c);
+      if (device_param->opencl_d_bitmap_s1_d)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bitmap_s1_d);
+      if (device_param->opencl_d_bitmap_s2_a)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bitmap_s2_a);
+      if (device_param->opencl_d_bitmap_s2_b)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bitmap_s2_b);
+      if (device_param->opencl_d_bitmap_s2_c)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bitmap_s2_c);
+      if (device_param->opencl_d_bitmap_s2_d)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_bitmap_s2_d);
+      if (device_param->opencl_d_plain_bufs)     hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_plain_bufs);
+      if (device_param->opencl_d_digests_buf)    hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_digests_buf);
+      if (device_param->opencl_d_digests_shown)  hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_digests_shown);
+      if (device_param->opencl_d_salt_bufs)      hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_salt_bufs);
+      if (device_param->opencl_d_esalt_bufs)     hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_esalt_bufs);
+      if (device_param->opencl_d_tmps)           hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_tmps);
+      if (device_param->opencl_d_hooks)          hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_hooks);
+      if (device_param->opencl_d_result)         hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_result);
+      if (device_param->opencl_d_extra0_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_extra0_buf);
+      if (device_param->opencl_d_extra1_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_extra1_buf);
+      if (device_param->opencl_d_extra2_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_extra2_buf);
+      if (device_param->opencl_d_extra3_buf)     hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_extra3_buf);
+      if (device_param->opencl_d_root_css_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_root_css_buf);
+      if (device_param->opencl_d_markov_css_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_markov_css_buf);
+      if (device_param->opencl_d_tm_c)           hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_tm_c);
+      if (device_param->opencl_d_st_digests_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_st_digests_buf);
+      if (device_param->opencl_d_st_salts_buf)   hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_st_salts_buf);
+      if (device_param->opencl_d_st_esalts_buf)  hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_st_esalts_buf);
 
-      if (device_param->kernel1)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel1);
-      if (device_param->kernel12)         hc_clReleaseKernel (hashcat_ctx, device_param->kernel12);
-      if (device_param->kernel2)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel2);
-      if (device_param->kernel23)         hc_clReleaseKernel (hashcat_ctx, device_param->kernel23);
-      if (device_param->kernel3)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel3);
-      if (device_param->kernel4)          hc_clReleaseKernel (hashcat_ctx, device_param->kernel4);
-      if (device_param->kernel_init2)     hc_clReleaseKernel (hashcat_ctx, device_param->kernel_init2);
-      if (device_param->kernel_loop2)     hc_clReleaseKernel (hashcat_ctx, device_param->kernel_loop2);
-      if (device_param->kernel_mp)        hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp);
-      if (device_param->kernel_mp_l)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp_l);
-      if (device_param->kernel_mp_r)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_mp_r);
-      if (device_param->kernel_tm)        hc_clReleaseKernel (hashcat_ctx, device_param->kernel_tm);
-      if (device_param->kernel_amp)       hc_clReleaseKernel (hashcat_ctx, device_param->kernel_amp);
-      if (device_param->kernel_memset)    hc_clReleaseKernel (hashcat_ctx, device_param->kernel_memset);
-      if (device_param->kernel_atinit)    hc_clReleaseKernel (hashcat_ctx, device_param->kernel_atinit);
-      if (device_param->kernel_decompress)hc_clReleaseKernel (hashcat_ctx, device_param->kernel_decompress);
-      if (device_param->kernel_aux1)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux1);
-      if (device_param->kernel_aux2)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux2);
-      if (device_param->kernel_aux3)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux3);
-      if (device_param->kernel_aux4)      hc_clReleaseKernel (hashcat_ctx, device_param->kernel_aux4);
+      if (device_param->opencl_kernel1)          hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel1);
+      if (device_param->opencl_kernel12)         hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel12);
+      if (device_param->opencl_kernel2)          hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel2);
+      if (device_param->opencl_kernel23)         hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel23);
+      if (device_param->opencl_kernel3)          hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel3);
+      if (device_param->opencl_kernel4)          hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel4);
+      if (device_param->opencl_kernel_init2)     hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_init2);
+      if (device_param->opencl_kernel_loop2)     hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_loop2);
+      if (device_param->opencl_kernel_mp)        hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_mp);
+      if (device_param->opencl_kernel_mp_l)      hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_mp_l);
+      if (device_param->opencl_kernel_mp_r)      hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_mp_r);
+      if (device_param->opencl_kernel_tm)        hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_tm);
+      if (device_param->opencl_kernel_amp)       hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_amp);
+      if (device_param->opencl_kernel_memset)    hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_memset);
+      if (device_param->opencl_kernel_atinit)    hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_atinit);
+      if (device_param->opencl_kernel_decompress)hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_decompress);
+      if (device_param->opencl_kernel_aux1)      hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_aux1);
+      if (device_param->opencl_kernel_aux2)      hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_aux2);
+      if (device_param->opencl_kernel_aux3)      hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_aux3);
+      if (device_param->opencl_kernel_aux4)      hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_aux4);
 
-      if (device_param->program)          hc_clReleaseProgram (hashcat_ctx, device_param->program);
-      if (device_param->program_mp)       hc_clReleaseProgram (hashcat_ctx, device_param->program_mp);
-      if (device_param->program_amp)      hc_clReleaseProgram (hashcat_ctx, device_param->program_amp);
+      if (device_param->opencl_program)          hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program);
+      if (device_param->opencl_program_mp)       hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_mp);
+      if (device_param->opencl_program_amp)      hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_amp);
 
       if (device_param->opencl_command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
 
@@ -8396,65 +8396,65 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
     device_param->brain_link_out_buf  = NULL;
     #endif
 
-    device_param->d_pws_buf           = NULL;
-    device_param->d_pws_amp_buf       = NULL;
-    device_param->d_pws_comp_buf      = NULL;
-    device_param->d_pws_idx           = NULL;
-    device_param->d_rules             = NULL;
-    device_param->d_rules_c           = NULL;
-    device_param->d_combs             = NULL;
-    device_param->d_combs_c           = NULL;
-    device_param->d_bfs               = NULL;
-    device_param->d_bfs_c             = NULL;
-    device_param->d_bitmap_s1_a       = NULL;
-    device_param->d_bitmap_s1_b       = NULL;
-    device_param->d_bitmap_s1_c       = NULL;
-    device_param->d_bitmap_s1_d       = NULL;
-    device_param->d_bitmap_s2_a       = NULL;
-    device_param->d_bitmap_s2_b       = NULL;
-    device_param->d_bitmap_s2_c       = NULL;
-    device_param->d_bitmap_s2_d       = NULL;
-    device_param->d_plain_bufs        = NULL;
-    device_param->d_digests_buf       = NULL;
-    device_param->d_digests_shown     = NULL;
-    device_param->d_salt_bufs         = NULL;
-    device_param->d_esalt_bufs        = NULL;
-    device_param->d_tmps              = NULL;
-    device_param->d_hooks             = NULL;
-    device_param->d_result            = NULL;
-    device_param->d_extra0_buf        = NULL;
-    device_param->d_extra1_buf        = NULL;
-    device_param->d_extra2_buf        = NULL;
-    device_param->d_extra3_buf        = NULL;
-    device_param->d_root_css_buf      = NULL;
-    device_param->d_markov_css_buf    = NULL;
-    device_param->d_tm_c              = NULL;
-    device_param->d_st_digests_buf    = NULL;
-    device_param->d_st_salts_buf      = NULL;
-    device_param->d_st_esalts_buf     = NULL;
-    device_param->kernel1             = NULL;
-    device_param->kernel12            = NULL;
-    device_param->kernel2             = NULL;
-    device_param->kernel23            = NULL;
-    device_param->kernel3             = NULL;
-    device_param->kernel4             = NULL;
-    device_param->kernel_init2        = NULL;
-    device_param->kernel_loop2        = NULL;
-    device_param->kernel_mp           = NULL;
-    device_param->kernel_mp_l         = NULL;
-    device_param->kernel_mp_r         = NULL;
-    device_param->kernel_tm           = NULL;
-    device_param->kernel_amp          = NULL;
-    device_param->kernel_memset       = NULL;
-    device_param->kernel_atinit       = NULL;
-    device_param->kernel_decompress   = NULL;
-    device_param->kernel_aux1         = NULL;
-    device_param->kernel_aux2         = NULL;
-    device_param->kernel_aux3         = NULL;
-    device_param->kernel_aux4         = NULL;
-    device_param->program             = NULL;
-    device_param->program_mp          = NULL;
-    device_param->program_amp         = NULL;
+    device_param->opencl_d_pws_buf           = NULL;
+    device_param->opencl_d_pws_amp_buf       = NULL;
+    device_param->opencl_d_pws_comp_buf      = NULL;
+    device_param->opencl_d_pws_idx           = NULL;
+    device_param->opencl_d_rules             = NULL;
+    device_param->opencl_d_rules_c           = NULL;
+    device_param->opencl_d_combs             = NULL;
+    device_param->opencl_d_combs_c           = NULL;
+    device_param->opencl_d_bfs               = NULL;
+    device_param->opencl_d_bfs_c             = NULL;
+    device_param->opencl_d_bitmap_s1_a       = NULL;
+    device_param->opencl_d_bitmap_s1_b       = NULL;
+    device_param->opencl_d_bitmap_s1_c       = NULL;
+    device_param->opencl_d_bitmap_s1_d       = NULL;
+    device_param->opencl_d_bitmap_s2_a       = NULL;
+    device_param->opencl_d_bitmap_s2_b       = NULL;
+    device_param->opencl_d_bitmap_s2_c       = NULL;
+    device_param->opencl_d_bitmap_s2_d       = NULL;
+    device_param->opencl_d_plain_bufs        = NULL;
+    device_param->opencl_d_digests_buf       = NULL;
+    device_param->opencl_d_digests_shown     = NULL;
+    device_param->opencl_d_salt_bufs         = NULL;
+    device_param->opencl_d_esalt_bufs        = NULL;
+    device_param->opencl_d_tmps              = NULL;
+    device_param->opencl_d_hooks             = NULL;
+    device_param->opencl_d_result            = NULL;
+    device_param->opencl_d_extra0_buf        = NULL;
+    device_param->opencl_d_extra1_buf        = NULL;
+    device_param->opencl_d_extra2_buf        = NULL;
+    device_param->opencl_d_extra3_buf        = NULL;
+    device_param->opencl_d_root_css_buf      = NULL;
+    device_param->opencl_d_markov_css_buf    = NULL;
+    device_param->opencl_d_tm_c              = NULL;
+    device_param->opencl_d_st_digests_buf    = NULL;
+    device_param->opencl_d_st_salts_buf      = NULL;
+    device_param->opencl_d_st_esalts_buf     = NULL;
+    device_param->opencl_kernel1             = NULL;
+    device_param->opencl_kernel12            = NULL;
+    device_param->opencl_kernel2             = NULL;
+    device_param->opencl_kernel23            = NULL;
+    device_param->opencl_kernel3             = NULL;
+    device_param->opencl_kernel4             = NULL;
+    device_param->opencl_kernel_init2        = NULL;
+    device_param->opencl_kernel_loop2        = NULL;
+    device_param->opencl_kernel_mp           = NULL;
+    device_param->opencl_kernel_mp_l         = NULL;
+    device_param->opencl_kernel_mp_r         = NULL;
+    device_param->opencl_kernel_tm           = NULL;
+    device_param->opencl_kernel_amp          = NULL;
+    device_param->opencl_kernel_memset       = NULL;
+    device_param->opencl_kernel_atinit       = NULL;
+    device_param->opencl_kernel_decompress   = NULL;
+    device_param->opencl_kernel_aux1         = NULL;
+    device_param->opencl_kernel_aux2         = NULL;
+    device_param->opencl_kernel_aux3         = NULL;
+    device_param->opencl_kernel_aux4         = NULL;
+    device_param->opencl_program             = NULL;
+    device_param->opencl_program_mp          = NULL;
+    device_param->opencl_program_amp         = NULL;
     device_param->opencl_command_queue       = NULL;
     device_param->opencl_context             = NULL;
   }
@@ -8536,15 +8536,15 @@ int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
     {
       int CL_rc;
 
-      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel1, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
-      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
-      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel3, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
-      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel4, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel1, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel3, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel4, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
 
-      if (hashconfig->opts_type & OPTS_TYPE_HOOK12) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel12,     33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
-      if (hashconfig->opts_type & OPTS_TYPE_HOOK23) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel23,     33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
-      if (hashconfig->opts_type & OPTS_TYPE_INIT2)  { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_init2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
-      if (hashconfig->opts_type & OPTS_TYPE_LOOP2)  { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_loop2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+      if (hashconfig->opts_type & OPTS_TYPE_HOOK12) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel12,     33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+      if (hashconfig->opts_type & OPTS_TYPE_HOOK23) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel23,     33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+      if (hashconfig->opts_type & OPTS_TYPE_INIT2)  { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_init2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
+      if (hashconfig->opts_type & OPTS_TYPE_LOOP2)  { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_loop2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1; }
     }
     */
 
@@ -8563,7 +8563,7 @@ int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
         {
           int CL_rc;
 
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
 
           if (CL_rc == -1) return -1;
         }
@@ -8599,11 +8599,11 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
     {
       int CL_rc = CL_SUCCESS;
 
-      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
-      for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp, i, sizeof (cl_uint),  device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_uint),  device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
     }
   }
 
@@ -8639,16 +8639,16 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
     {
       int CL_rc = CL_SUCCESS;
 
-      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
-      for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_uint),  device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
-      for (u32 i = 9; i < 9; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_uint),  device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 9; i < 9; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
 
-      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
-      for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_uint),  device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
-      for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_uint),  device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_root_css_buf,   CL_TRUE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
     }
   }
 
diff --git a/src/hashes.c b/src/hashes.c
index 2cea080b9..b43a66b26 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -309,7 +309,7 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
   {
     tmps = hcmalloc (hashconfig->tmp_size);
 
-    hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
+    hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
   }
 
   // hash
@@ -462,7 +462,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
   cl_int CL_err;
 
-  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
 
   if (CL_err != CL_SUCCESS)
   {
@@ -483,7 +483,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   {
     plain_t *cracked = (plain_t *) hccalloc (num_cracked, sizeof (plain_t));
 
-    CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
+    CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
 
     if (CL_err != CL_SUCCESS)
     {
@@ -553,7 +553,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
       memset (hashes->digests_shown_tmp, 0, salt_buf->digests_cnt * sizeof (u32));
 
-      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
+      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
 
       if (CL_err != CL_SUCCESS)
       {
@@ -565,7 +565,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     num_cracked = 0;
 
-    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
 
     if (CL_err != CL_SUCCESS)
     {
diff --git a/src/selftest.c b/src/selftest.c
index 2f0535501..befef1988 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -29,9 +29,9 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   // init : replace hashes with selftest hash
 
-  device_param->kernel_params[15] = &device_param->d_st_digests_buf;
-  device_param->kernel_params[17] = &device_param->d_st_salts_buf;
-  device_param->kernel_params[18] = &device_param->d_st_esalts_buf;
+  device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf;
+  device_param->kernel_params[17] = &device_param->opencl_d_st_salts_buf;
+  device_param->kernel_params[18] = &device_param->opencl_d_st_esalts_buf;
 
   device_param->kernel_params_buf32[31] = 1;
   device_param->kernel_params_buf32[32] = 0;
@@ -57,7 +57,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     pw.pw_len = (u32) pw_len;
 
-    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
     if (CL_err != CL_SUCCESS) return -1;
   }
@@ -84,7 +84,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           uppercase ((u8 *) pw_ptr, pw.pw_len);
         }
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
         if (CL_err != CL_SUCCESS) return -1;
       }
@@ -136,11 +136,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           comb_ptr[comb.pw_len] = 0x80;
         }
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL);
+        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL);
 
         if (CL_err != CL_SUCCESS) return -1;
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
         if (CL_err != CL_SUCCESS) return -1;
       }
@@ -165,7 +165,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
           pw.pw_len = (u32) pw_len;
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
           if (CL_err != CL_SUCCESS) return -1;
         }
@@ -208,7 +208,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             bf.i = byte_swap_32 (bf.i);
           }
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL);
+          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL);
 
           if (CL_err != CL_SUCCESS) return -1;
 
@@ -296,7 +296,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             for (int i = 0; i < 14; i++) pw.i[i] = byte_swap_32 (pw.i[i]);
           }
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
           if (CL_err != CL_SUCCESS) return -1;
 
@@ -316,7 +316,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       pw.pw_len = (u32) pw_len;
 
-      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
 
       if (CL_err != CL_SUCCESS) return -1;
     }
@@ -372,13 +372,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
 
       module_ctx->module_hook12 (device_param, hashes->st_hook_salts_buf, 0, 1);
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
     }
@@ -411,13 +411,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
 
       module_ctx->module_hook23 (device_param, hashes->st_hook_salts_buf, 0, 1);
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
     }
@@ -492,7 +492,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   u32 num_cracked;
 
-  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
 
   if (CL_err != CL_SUCCESS) return -1;
 
@@ -507,20 +507,20 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   device_param->kernel_params_buf32[33] = 0;
   device_param->kernel_params_buf64[34] = 0;
 
-  device_param->kernel_params[15] = &device_param->d_digests_buf;
-  device_param->kernel_params[17] = &device_param->d_salt_bufs;
-  device_param->kernel_params[18] = &device_param->d_esalt_bufs;
+  device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
+  device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
+  device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
 
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
+  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
+  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
+  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
+  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
+  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
+  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
 
   if (user_options->slow_candidates == true)
   {
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_rules_c, device_param->size_rules_c);
+    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
 
     if (CL_rc == -1) return -1;
   }
@@ -528,19 +528,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   {
     if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
     {
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_rules_c, device_param->size_rules_c);
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
 
       if (CL_rc == -1) return -1;
     }
     else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
     {
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_combs_c, device_param->size_combs);
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs);
 
       if (CL_rc == -1) return -1;
     }
     else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
     {
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_bfs_c, device_param->size_bfs);
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs);
 
       if (CL_rc == -1) return -1;
     }

From 503304f36aa49b17e55342725a4345301c3dc77e Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 3 May 2019 12:07:06 +0200
Subject: [PATCH 23/73] Add some first CUDA device memory allocations and host
 buffer copies

---
 include/types.h |  45 +++
 src/backend.c   | 711 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 556 insertions(+), 200 deletions(-)

diff --git a/include/types.h b/include/types.h
index d38d20191..747b78369 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1245,9 +1245,54 @@ typedef struct hc_device_param
 
   bool              is_cuda;
 
+  int               cuda_warp_size;
+
   CUdevice          cuda_device;
   CUcontext         cuda_context;
 
+  CUmodule          cuda_module;
+  CUmodule          cuda_module_mp;
+  CUmodule          cuda_module_amp;
+
+  CUdeviceptr       cuda_d_pws_buf;
+  CUdeviceptr       cuda_d_pws_amp_buf;
+  CUdeviceptr       cuda_d_pws_comp_buf;
+  CUdeviceptr       cuda_d_pws_idx;
+  CUdeviceptr       cuda_d_words_buf_l;
+  CUdeviceptr       cuda_d_words_buf_r;
+  CUdeviceptr       cuda_d_rules;
+  CUdeviceptr       cuda_d_rules_c;
+  CUdeviceptr       cuda_d_combs;
+  CUdeviceptr       cuda_d_combs_c;
+  CUdeviceptr       cuda_d_bfs;
+  CUdeviceptr       cuda_d_bfs_c;
+  CUdeviceptr       cuda_d_tm_c;
+  CUdeviceptr       cuda_d_bitmap_s1_a;
+  CUdeviceptr       cuda_d_bitmap_s1_b;
+  CUdeviceptr       cuda_d_bitmap_s1_c;
+  CUdeviceptr       cuda_d_bitmap_s1_d;
+  CUdeviceptr       cuda_d_bitmap_s2_a;
+  CUdeviceptr       cuda_d_bitmap_s2_b;
+  CUdeviceptr       cuda_d_bitmap_s2_c;
+  CUdeviceptr       cuda_d_bitmap_s2_d;
+  CUdeviceptr       cuda_d_plain_bufs;
+  CUdeviceptr       cuda_d_digests_buf;
+  CUdeviceptr       cuda_d_digests_shown;
+  CUdeviceptr       cuda_d_salt_bufs;
+  CUdeviceptr       cuda_d_esalt_bufs;
+  CUdeviceptr       cuda_d_tmps;
+  CUdeviceptr       cuda_d_hooks;
+  CUdeviceptr       cuda_d_result;
+  CUdeviceptr       cuda_d_extra0_buf;
+  CUdeviceptr       cuda_d_extra1_buf;
+  CUdeviceptr       cuda_d_extra2_buf;
+  CUdeviceptr       cuda_d_extra3_buf;
+  CUdeviceptr       cuda_d_root_css_buf;
+  CUdeviceptr       cuda_d_markov_css_buf;
+  CUdeviceptr       cuda_d_st_digests_buf;
+  CUdeviceptr       cuda_d_st_salts_buf;
+  CUdeviceptr       cuda_d_st_esalts_buf;
+
   // API: opencl
 
   bool              is_opencl;
diff --git a/src/backend.c b/src/backend.c
index dca1e6e7b..9e86c5b6a 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -4249,6 +4249,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   int cuda_devices_cnt    = 0;
   int cuda_devices_active = 0;
 
+  int CL_rc;
+  int CU_rc;
+
   if (backend_ctx->cuda)
   {
     // device count
@@ -4276,8 +4279,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       CUdevice cuda_device;
 
-      int CU_rc;
-
       CU_rc = hc_cuDeviceGet (hashcat_ctx, &cuda_device, cuda_devices_idx);
 
       if (CU_rc == -1) return -1;
@@ -4324,6 +4325,16 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->device_available_mem = 0;
 
+      // warp size
+
+      int cuda_warp_size = 0;
+
+      CU_rc = hc_cuDeviceGetAttribute (hashcat_ctx, &cuda_warp_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE, cuda_device);
+
+      if (CU_rc == -1) return -1;
+
+      device_param->cuda_warp_size = cuda_warp_size;
+
       // sm_minor, sm_major
 
       int sm_major = 0;
@@ -4639,8 +4650,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         size_t param_value_size = 0;
 
-        int CL_rc;
-
         // opencl_device_type
 
         cl_device_type opencl_device_type;
@@ -5874,6 +5883,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
   u32 hardware_power_all = 0;
 
+  int CU_rc;
+  int CL_rc;
+
   for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
     /**
@@ -5941,7 +5953,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (device_param->is_opencl == true)
           {
-            const int CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL);
 
             if (CL_rc == -1) return -1;
           }
@@ -5957,7 +5969,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (device_param->is_opencl == true)
           {
-            const int CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL);
+            CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL);
 
             if (CL_rc == -1) return -1;
           }
@@ -6132,7 +6144,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_cuda == true)
     {
-      int CU_rc = hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_YIELD, device_param->cuda_device);
+      CU_rc = hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_YIELD, device_param->cuda_device);
 
       if (CU_rc == -1) return -1;
     }
@@ -6149,7 +6161,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
       */
 
-      int CL_rc = hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
+      CL_rc = hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
 
       if (CL_rc == -1) return -1;
 
@@ -6443,7 +6455,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (rc_read_kernel == false) return -1;
 
-        if (device_param->is_cuda)
+        if (device_param->is_cuda == true)
         {
           nvrtcProgram program;
 
@@ -6497,35 +6509,41 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           hcfree (nvrtc_options);
           hcfree (nvrtc_options_string);
 
-          if (cache_disable == false)
-          {
-            size_t binary_size;
+          size_t binary_size;
 
-            const int rc_nvrtcGetPTXSize = hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size);
+          const int rc_nvrtcGetPTXSize = hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size);
 
-            if (rc_nvrtcGetPTXSize == -1) return -1;
+          if (rc_nvrtcGetPTXSize == -1) return -1;
 
-            char *binary = (char *) hcmalloc (binary_size);
+          char *binary = (char *) hcmalloc (binary_size);
 
-            const int nvrtcGetPTX = hc_nvrtcGetPTX (hashcat_ctx, program, binary);
+          const int nvrtcGetPTX = hc_nvrtcGetPTX (hashcat_ctx, program, binary);
 
-            if (nvrtcGetPTX == -1) return -1;
-
-            const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
-
-            if (rc_write == false) return -1;
-
-            hcfree (binary);
-          }
+          if (nvrtcGetPTX == -1) return -1;
 
           const int rc_nvrtcDestroyProgram = hc_nvrtcDestroyProgram (hashcat_ctx, &program);
 
           if (rc_nvrtcDestroyProgram == -1) return -1;
+
+          // tbd: check for some useful options
+
+          const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataEx (hashcat_ctx, &device_param->cuda_module, binary, 0, NULL, NULL);
+
+          if (rc_cuModuleLoadDataEx == -1) return -1;
+
+          if (cache_disable == false)
+          {
+            const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+
+            if (rc_write == false) return -1;
+          }
+
+          hcfree (binary);
         }
 
-        if (device_param->is_opencl)
+        if (device_param->is_opencl == true)
         {
-          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program);
+          CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program);
 
           if (CL_rc == -1) return -1;
 
@@ -6547,9 +6565,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             char *build_log = (char *) hcmalloc (build_log_size + 1);
 
-            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            const int rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
 
-            if (CL_rc_build == -1) return -1;
+            if (rc_clGetProgramBuildInfo == -1) return -1;
 
             puts (build_log);
 
@@ -6593,14 +6611,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (rc_read_kernel == false) return -1;
 
-        if (device_param->is_cuda)
+        if (device_param->is_cuda == true)
         {
+          const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataEx (hashcat_ctx, &device_param->cuda_module, kernel_sources[0], 0, NULL, NULL);
 
+          if (rc_cuModuleLoadDataEx == -1) return -1;
         }
 
-        if (device_param->is_opencl)
+        if (device_param->is_opencl == true)
         {
-          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program);
+          CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program);
 
           if (CL_rc == -1) return -1;
 
@@ -6688,63 +6708,152 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program_mp);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
-
-          //if (CL_rc == -1) return -1;
-
-          size_t build_log_size = 0;
-
-          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
-
-          //if (CL_rc == -1) return -1;
-
-          #if defined (DEBUG)
-          if ((build_log_size > 1) || (CL_rc == -1))
-          #else
-          if (CL_rc == -1)
-          #endif
+          if (device_param->is_cuda == true)
           {
-            char *build_log = (char *) hcmalloc (build_log_size + 1);
+            nvrtcProgram program;
 
-            int CL_rc_build = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            const int rc_nvrtcCreateProgram = hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], "mp_kernel", 0, NULL, NULL);
 
-            if (CL_rc_build == -1) return -1;
+            if (rc_nvrtcCreateProgram == -1) return -1;
 
-            puts (build_log);
+            char **nvrtc_options = (char **) hccalloc (3 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
 
-            hcfree (build_log);
-          }
+            nvrtc_options[0] = "--device-as-default-execution-space";
+            nvrtc_options[1] = "--gpu-architecture";
 
-          if (CL_rc == -1)
-          {
-            device_param->skipped_warning = true;
+            hc_asprintf (&nvrtc_options[2], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+            char *nvrtc_options_string = hcstrdup (build_options_buf);
 
-            continue;
-          }
+            const int num_options = 3 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 3);
+
+            const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options);
+
+            size_t build_log_size = 0;
+
+            hc_nvrtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
+
+            #if defined (DEBUG)
+            if ((build_log_size > 1) || (rc_nvrtcCompileProgram == -1))
+            #else
+            if (rc_nvrtcCompileProgram == -1)
+            #endif
+            {
+              char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+              const int rc_nvrtcGetProgramLog = hc_nvrtcGetProgramLog (hashcat_ctx, program, build_log);
+
+              if (rc_nvrtcGetProgramLog == -1) return -1;
+
+              puts (build_log);
+
+              hcfree (build_log);
+            }
+
+            if (rc_nvrtcCompileProgram == -1)
+            {
+              device_param->skipped_warning = true;
+
+              event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+
+              continue;
+            }
+
+            hcfree (nvrtc_options);
+            hcfree (nvrtc_options_string);
 
-          if (cache_disable == false)
-          {
             size_t binary_size;
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_mp, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+            const int rc_nvrtcGetPTXSize = hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size);
 
-            if (CL_rc == -1) return -1;
+            if (rc_nvrtcGetPTXSize == -1) return -1;
 
             char *binary = (char *) hcmalloc (binary_size);
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_mp, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+            const int nvrtcGetPTX = hc_nvrtcGetPTX (hashcat_ctx, program, binary);
+
+            if (nvrtcGetPTX == -1) return -1;
+
+            const int rc_nvrtcDestroyProgram = hc_nvrtcDestroyProgram (hashcat_ctx, &program);
+
+            if (rc_nvrtcDestroyProgram == -1) return -1;
+
+            // tbd: check for some useful options
+
+            const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataEx (hashcat_ctx, &device_param->cuda_module_mp, binary, 0, NULL, NULL);
+
+            if (rc_cuModuleLoadDataEx == -1) return -1;
+
+            if (cache_disable == false)
+            {
+              const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+
+              if (rc_write == false) return -1;
+            }
+
+            hcfree (binary);
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program_mp);
 
             if (CL_rc == -1) return -1;
 
-            write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+            CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
-            hcfree (binary);
+            //if (CL_rc == -1) return -1;
+
+            size_t build_log_size = 0;
+
+            hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+
+            //if (CL_rc == -1) return -1;
+
+            #if defined (DEBUG)
+            if ((build_log_size > 1) || (CL_rc == -1))
+            #else
+            if (CL_rc == -1)
+            #endif
+            {
+              char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+              const int rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_mp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+
+              if (rc_clGetProgramBuildInfo == -1) return -1;
+
+              puts (build_log);
+
+              hcfree (build_log);
+            }
+
+            if (CL_rc == -1)
+            {
+              device_param->skipped_warning = true;
+
+              event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+
+              continue;
+            }
+
+            if (cache_disable == false)
+            {
+              size_t binary_size;
+
+              CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_mp, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+
+              if (CL_rc == -1) return -1;
+
+              char *binary = (char *) hcmalloc (binary_size);
+
+              CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_mp, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+
+              if (CL_rc == -1) return -1;
+
+              write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+
+              hcfree (binary);
+            }
           }
         }
         else
@@ -6753,13 +6862,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program_mp);
+          if (device_param->is_cuda == true)
+          {
+            const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataEx (hashcat_ctx, &device_param->cuda_module_mp, kernel_sources[0], 0, NULL, NULL);
 
-          if (CL_rc == -1) return -1;
+            if (rc_cuModuleLoadDataEx == -1) return -1;
+          }
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program_mp);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
+
+            CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_mp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
         }
 
         hcfree (kernel_sources[0]);
@@ -6843,63 +6962,152 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          int CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program_amp);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
-
-          //if (CL_rc == -1) return -1;
-
-          size_t build_log_size = 0;
-
-          hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
-
-          //if (CL_rc == -1) return -1;
-
-          #if defined (DEBUG)
-          if ((build_log_size > 1) || (CL_rc == -1))
-          #else
-          if (CL_rc == -1)
-          #endif
+          if (device_param->is_cuda == true)
           {
-            char *build_log = (char *) hcmalloc (build_log_size + 1);
+            nvrtcProgram program;
 
-            int CL_rc_build_info = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+            const int rc_nvrtcCreateProgram = hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], "mp_kernel", 0, NULL, NULL);
 
-            if (CL_rc_build_info == -1) return -1;
+            if (rc_nvrtcCreateProgram == -1) return -1;
 
-            puts (build_log);
+            char **nvrtc_options = (char **) hccalloc (3 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
 
-            hcfree (build_log);
-          }
+            nvrtc_options[0] = "--device-as-default-execution-space";
+            nvrtc_options[1] = "--gpu-architecture";
 
-          if (CL_rc == -1)
-          {
-            device_param->skipped_warning = true;
+            hc_asprintf (&nvrtc_options[2], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
 
-            event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+            char *nvrtc_options_string = hcstrdup (build_options_buf);
 
-            continue;
-          }
+            const int num_options = 3 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 3);
+
+            const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options);
+
+            size_t build_log_size = 0;
+
+            hc_nvrtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
+
+            #if defined (DEBUG)
+            if ((build_log_size > 1) || (rc_nvrtcCompileProgram == -1))
+            #else
+            if (rc_nvrtcCompileProgram == -1)
+            #endif
+            {
+              char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+              const int rc_nvrtcGetProgramLog = hc_nvrtcGetProgramLog (hashcat_ctx, program, build_log);
+
+              if (rc_nvrtcGetProgramLog == -1) return -1;
+
+              puts (build_log);
+
+              hcfree (build_log);
+            }
+
+            if (rc_nvrtcCompileProgram == -1)
+            {
+              device_param->skipped_warning = true;
+
+              event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+
+              continue;
+            }
+
+            hcfree (nvrtc_options);
+            hcfree (nvrtc_options_string);
 
-          if (cache_disable == false)
-          {
             size_t binary_size;
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_amp, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+            const int rc_nvrtcGetPTXSize = hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size);
 
-            if (CL_rc == -1) return -1;
+            if (rc_nvrtcGetPTXSize == -1) return -1;
 
             char *binary = (char *) hcmalloc (binary_size);
 
-            CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_amp, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+            const int nvrtcGetPTX = hc_nvrtcGetPTX (hashcat_ctx, program, binary);
+
+            if (nvrtcGetPTX == -1) return -1;
+
+            const int rc_nvrtcDestroyProgram = hc_nvrtcDestroyProgram (hashcat_ctx, &program);
+
+            if (rc_nvrtcDestroyProgram == -1) return -1;
+
+            // tbd: check for some useful options
+
+            const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataEx (hashcat_ctx, &device_param->cuda_module_amp, binary, 0, NULL, NULL);
+
+            if (rc_cuModuleLoadDataEx == -1) return -1;
+
+            if (cache_disable == false)
+            {
+              const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+
+              if (rc_write == false) return -1;
+            }
+
+            hcfree (binary);
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, &device_param->opencl_program_amp);
 
             if (CL_rc == -1) return -1;
 
-            write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+            CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
 
-            hcfree (binary);
+            //if (CL_rc == -1) return -1;
+
+            size_t build_log_size = 0;
+
+            hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
+
+            //if (CL_rc == -1) return -1;
+
+            #if defined (DEBUG)
+            if ((build_log_size > 1) || (CL_rc == -1))
+            #else
+            if (CL_rc == -1)
+            #endif
+            {
+              char *build_log = (char *) hcmalloc (build_log_size + 1);
+
+              const int rc_clGetProgramBuildInfo = hc_clGetProgramBuildInfo (hashcat_ctx, device_param->opencl_program_amp, device_param->opencl_device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
+
+              if (rc_clGetProgramBuildInfo == -1) return -1;
+
+              puts (build_log);
+
+              hcfree (build_log);
+            }
+
+            if (CL_rc == -1)
+            {
+              device_param->skipped_warning = true;
+
+              event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed - proceeding without this device.", device_id + 1, source_file);
+
+              continue;
+            }
+
+            if (cache_disable == false)
+            {
+              size_t binary_size;
+
+              CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_amp, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL);
+
+              if (CL_rc == -1) return -1;
+
+              char *binary = (char *) hcmalloc (binary_size);
+
+              CL_rc = hc_clGetProgramInfo (hashcat_ctx, device_param->opencl_program_amp, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL);
+
+              if (CL_rc == -1) return -1;
+
+              write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
+
+              hcfree (binary);
+            }
           }
         }
         else
@@ -6908,13 +7116,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (rc_read_kernel == false) return -1;
 
-          int CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program_amp);
+          if (device_param->is_cuda == true)
+          {
+            const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataEx (hashcat_ctx, &device_param->cuda_module_amp, kernel_sources[0], 0, NULL, NULL);
 
-          if (CL_rc == -1) return -1;
+            if (rc_cuModuleLoadDataEx == -1) return -1;
+          }
 
-          CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, &device_param->opencl_program_amp);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
+
+            CL_rc = hc_clBuildProgram (hashcat_ctx, device_param->opencl_program_amp, 1, &device_param->opencl_device, build_options_buf, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
         }
 
         hcfree (kernel_sources[0]);
@@ -6940,99 +7158,196 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * global buffers
      */
 
-    int CL_rc;
-
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_b);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_c);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_d);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_a);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_b);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_c);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_d);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_plains,             NULL, &device_param->opencl_d_plain_bufs);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_digests,            NULL, &device_param->opencl_d_digests_buf);    if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_shown,              NULL, &device_param->opencl_d_digests_shown);  if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_salts,              NULL, &device_param->opencl_d_salt_bufs);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_results,            NULL, &device_param->opencl_d_result);         if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra0_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra1_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra2_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra3_buf);     if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->opencl_d_st_digests_buf); if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->opencl_d_st_salts_buf);   if (CL_rc == -1) return -1;
-
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d,     CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf,     CL_TRUE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_salt_bufs,       CL_TRUE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL); if (CL_rc == -1) return -1;
-
-    /**
-     * special buffers
-     */
-
-    if (user_options->slow_candidates == true)
+    if (device_param->is_cuda == true)
     {
-      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c); if (CL_rc == -1) return -1;
-    }
-    else
-    {
-      if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_a,    bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_b,    bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_c,    bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s1_d,    bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_a,    bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_b,    bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_c,    bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bitmap_s2_d,    bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_plain_bufs,     size_plains);             if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_buf,    size_digests);            if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_digests_shown,  size_shown);              if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_salt_bufs,      size_salts);              if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_result,         size_results);            if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra0_buf,     size_extra_buffer / 4);   if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra1_buf,     size_extra_buffer / 4);   if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra2_buf,     size_extra_buffer / 4);   if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_extra3_buf,     size_extra_buffer / 4);   if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_digests_buf, size_st_digests);         if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_salts_buf,   size_st_salts);           if (CU_rc == -1) return -1;
+
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_digests_buf, hashes->digests_buf,     size_digests);            if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_salt_bufs,   hashes->salts_buf,       size_salts);              if (CU_rc == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->opencl_d_rules);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c); if (CL_rc == -1) return -1;
-
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_TRUE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+        CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c); if (CU_rc == -1) return -1;
       }
-      else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+      else
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs);          if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs_c);        if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf); if (CL_rc == -1) return -1;
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules,   size_rules);   if (CU_rc == -1) return -1;
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c); if (CU_rc == -1) return -1;
+
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules); if (CU_rc == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs,          size_combs);      if (CU_rc == -1) return -1;
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_combs_c,        size_combs);      if (CU_rc == -1) return -1;
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf,   size_root_css);   if (CU_rc == -1) return -1;
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css); if (CU_rc == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs,            size_bfs);        if (CU_rc == -1) return -1;
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c,          size_bfs);        if (CU_rc == -1) return -1;
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c,           size_tm);         if (CU_rc == -1) return -1;
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf,   size_root_css);   if (CU_rc == -1) return -1;
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css); if (CU_rc == -1) return -1;
+        }
       }
-      else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
-      {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs);            if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs_c);          if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm,         NULL, &device_param->opencl_d_tm_c);           if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf);   if (CL_rc == -1) return -1;
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf); if (CL_rc == -1) return -1;
-      }
-    }
-
-    if (size_esalts)
-    {
-      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->opencl_d_esalt_bufs);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_TRUE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL);
-
-      if (CL_rc == -1) return -1;
-    }
-
-    if (hashconfig->st_hash != NULL)
-    {
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_digests_buf,  CL_TRUE, 0, size_st_digests,         hashes->st_digests_buf,  0, NULL, NULL); if (CL_rc == -1) return -1;
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_salts_buf,    CL_TRUE, 0, size_st_salts,           hashes->st_salts_buf,    0, NULL, NULL); if (CL_rc == -1) return -1;
 
       if (size_esalts)
       {
-        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->opencl_d_st_esalts_buf);
+        CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_esalt_bufs, size_esalts);
+
+        if (CU_rc == -1) return -1;
+
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_esalt_bufs, hashes->esalts_buf, size_esalts);
+
+        if (CU_rc == -1) return -1;
+      }
+
+      if (hashconfig->st_hash != NULL)
+      {
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_st_digests_buf, hashes->st_digests_buf, size_st_digests); if (CU_rc == -1) return -1;
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts);   if (CU_rc == -1) return -1;
+
+        if (size_esalts)
+        {
+          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_esalts_buf, size_st_esalts);
+
+          if (CU_rc == -1) return -1;
+
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts);
+
+          if (CU_rc == -1) return -1;
+        }
+      }
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_b);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_c);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_d);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_a);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_b);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_c);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s2_d);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_plains,             NULL, &device_param->opencl_d_plain_bufs);     if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_digests,            NULL, &device_param->opencl_d_digests_buf);    if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_shown,              NULL, &device_param->opencl_d_digests_shown);  if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_salts,              NULL, &device_param->opencl_d_salt_bufs);      if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_results,            NULL, &device_param->opencl_d_result);         if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra0_buf);     if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra1_buf);     if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra2_buf);     if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_extra_buffer / 4,   NULL, &device_param->opencl_d_extra3_buf);     if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_digests,         NULL, &device_param->opencl_d_st_digests_buf); if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->opencl_d_st_salts_buf);   if (CL_rc == -1) return -1;
+
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf, CL_TRUE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL); if (CL_rc == -1) return -1;
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_salt_bufs,   CL_TRUE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL); if (CL_rc == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
+      {
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c); if (CL_rc == -1) return -1;
+      }
+      else
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->opencl_d_rules);   if (CL_rc == -1) return -1;
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c); if (CL_rc == -1) return -1;
+
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_TRUE, 0, size_rules, straight_ctx->kernel_rules_buf, 0, NULL, NULL); if (CL_rc == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs);          if (CL_rc == -1) return -1;
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_combs,      NULL, &device_param->opencl_d_combs_c);        if (CL_rc == -1) return -1;
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf);   if (CL_rc == -1) return -1;
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf); if (CL_rc == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs);            if (CL_rc == -1) return -1;
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_bfs,        NULL, &device_param->opencl_d_bfs_c);          if (CL_rc == -1) return -1;
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_tm,         NULL, &device_param->opencl_d_tm_c);           if (CL_rc == -1) return -1;
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_root_css,   NULL, &device_param->opencl_d_root_css_buf);   if (CL_rc == -1) return -1;
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_markov_css, NULL, &device_param->opencl_d_markov_css_buf); if (CL_rc == -1) return -1;
+        }
+      }
+
+      if (size_esalts)
+      {
+        CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->opencl_d_esalt_bufs);
 
         if (CL_rc == -1) return -1;
 
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_TRUE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL);
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_TRUE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
       }
+
+      if (hashconfig->st_hash != NULL)
+      {
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_digests_buf,  CL_TRUE, 0, size_st_digests,         hashes->st_digests_buf,  0, NULL, NULL); if (CL_rc == -1) return -1;
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_salts_buf,    CL_TRUE, 0, size_st_salts,           hashes->st_salts_buf,    0, NULL, NULL); if (CL_rc == -1) return -1;
+
+        if (size_esalts)
+        {
+          CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->opencl_d_st_esalts_buf);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_TRUE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
+      }
     }
 
     /**
@@ -8534,8 +8849,6 @@ int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
     /*
     if (device_param->is_opencl == true)
     {
-      int CL_rc;
-
       CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel1, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
       CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel2, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
       CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel3, 33, sizeof (cl_uint), device_param->kernel_params[33]); if (CL_rc == -1) return -1;
@@ -8561,11 +8874,9 @@ int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx)
       {
         if (device_param->is_opencl == true)
         {
-          int CL_rc;
+          const int rc_clSetKernelArg = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
 
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
-
-          if (CL_rc == -1) return -1;
+          if (rc_clSetKernelArg == -1) return -1;
         }
       }
     }

From 5ee033673c73fc950380b7d347e0191a865e88df Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 3 May 2019 15:50:07 +0200
Subject: [PATCH 24/73] Disable name mangling in NVRTC's PTX output and more

---
 OpenCL/inc_vendor.h |    2 +-
 include/backend.h   |    2 +
 include/types.h     |   21 +
 src/backend.c       | 2361 ++++++++++++++++++++++++++++++-------------
 4 files changed, 1665 insertions(+), 721 deletions(-)

diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index f8def9bd2..f69e0573b 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -23,7 +23,7 @@
 #define CONSTANT_AS
 #define GLOBAL_AS
 #define LOCAL_AS
-#define KERNEL_FQ   __global__
+#define KERNEL_FQ   extern "C" __global__
 #elif defined IS_OPENCL
 #define CONSTANT_AS __constant
 #define GLOBAL_AS   __global
diff --git a/include/backend.h b/include/backend.h
index ca3f8548e..d479cc5b3 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -56,6 +56,8 @@ int hc_cuMemcpyHtoD              (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDev
 int hc_cuMemFree                 (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr);
 int hc_cuModuleLoadDataEx        (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
 int hc_cuModuleUnload            (hashcat_ctx_t *hashcat_ctx, CUmodule hmod);
+int hc_cuModuleGetFunction       (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
+int hc_cuFuncGetAttribute        (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
diff --git a/include/types.h b/include/types.h
index 747b78369..861b27858 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1254,6 +1254,27 @@ typedef struct hc_device_param
   CUmodule          cuda_module_mp;
   CUmodule          cuda_module_amp;
 
+  CUfunction        cuda_function1;
+  CUfunction        cuda_function12;
+  CUfunction        cuda_function2;
+  CUfunction        cuda_function23;
+  CUfunction        cuda_function3;
+  CUfunction        cuda_function4;
+  CUfunction        cuda_function_init2;
+  CUfunction        cuda_function_loop2;
+  CUfunction        cuda_function_mp;
+  CUfunction        cuda_function_mp_l;
+  CUfunction        cuda_function_mp_r;
+  CUfunction        cuda_function_amp;
+  CUfunction        cuda_function_tm;
+  CUfunction        cuda_function_memset;
+  CUfunction        cuda_function_atinit;
+  CUfunction        cuda_function_decompress;
+  CUfunction        cuda_function_aux1;
+  CUfunction        cuda_function_aux2;
+  CUfunction        cuda_function_aux3;
+  CUfunction        cuda_function_aux4;
+
   CUdeviceptr       cuda_d_pws_buf;
   CUdeviceptr       cuda_d_pws_amp_buf;
   CUdeviceptr       cuda_d_pws_comp_buf;
diff --git a/src/backend.c b/src/backend.c
index 9e86c5b6a..bba291d9e 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -1410,6 +1410,62 @@ int hc_cuMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const vo
   return 0;
 }
 
+int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuModuleGetFunction (hfunc, hmod, name);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuModuleGetFunction(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuModuleGetFunction(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuFuncGetAttribute (pi, attrib, hfunc);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuFuncGetAttribute(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuFuncGetAttribute(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+
+
 // OpenCL
 
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
@@ -5117,6 +5173,16 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
             // By default 8%
 
             device_param->spin_damp = (double) user_options->spin_damp / 100;
+
+            // recommend CUDA
+
+            if ((backend_ctx->cuda == NULL) || (backend_ctx->nvrtc == NULL))
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: No CUDA Toolkit installation detected.", device_id + 1);
+              event_log_warning (hashcat_ctx, "             Please install CUDA Toolkit for best utilization of this device");
+              event_log_warning (hashcat_ctx, "             Falling back to OpenCL");
+              event_log_warning (hashcat_ctx, NULL);
+            }
           }
         }
 
@@ -5685,7 +5751,33 @@ void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx)
   }
 }
 
-static int get_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
+static int get_cuda_kernel_wgs (hashcat_ctx_t *hashcat_ctx, CUfunction function, u32 *result)
+{
+  int max_threads_per_block;
+
+  const int rc_cuFuncGetAttribute = hc_cuFuncGetAttribute (hashcat_ctx, &max_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function);
+
+  if (rc_cuFuncGetAttribute == -1) return -1;
+
+  *result = (u32) max_threads_per_block;
+
+  return 0;
+}
+
+static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunction function, u64 *result)
+{
+  int shared_size_bytes;
+
+  const int rc_cuFuncGetAttribute = hc_cuFuncGetAttribute (hashcat_ctx, &shared_size_bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function);
+
+  if (rc_cuFuncGetAttribute == -1) return -1;
+
+  *result = (u64) shared_size_bytes;
+
+  return 0;
+}
+
+static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
 {
   int CL_rc;
 
@@ -5715,7 +5807,7 @@ static int get_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
   return 0;
 }
 
-static int get_kernel_preferred_wgs_multiple (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
+static int get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result)
 {
   int CL_rc;
 
@@ -5730,7 +5822,7 @@ static int get_kernel_preferred_wgs_multiple (hashcat_ctx_t *hashcat_ctx, hc_dev
   return 0;
 }
 
-static int get_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result)
+static int get_opencl_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u64 *result)
 {
   int CL_rc;
 
@@ -5745,7 +5837,7 @@ static int get_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_device_para
   return 0;
 }
 
-static u32 get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param)
+static u32 get_opencl_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param)
 {
   const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
 
@@ -5912,7 +6004,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if ((unstable_warning == true) && (user_options->force == false))
       {
-        event_log_warning (hashcat_ctx, "* Device #%u: Skipping hash-mode %u - known OpenCL/Driver issue (not a hashcat issue)", device_id + 1, hashconfig->hash_mode);
+        event_log_warning (hashcat_ctx, "* Device #%u: Skipping hash-mode %u - known CUDA/OpenCL Runtime/Driver issue (not a hashcat issue)", device_id + 1, hashconfig->hash_mode);
         event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
 
         device_param->skipped_warning = true;
@@ -7366,30 +7458,62 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     device_param->kernel_params_buf32[33] = 0; // combs_mode
     device_param->kernel_params_buf64[34] = 0; // gid_max
 
-    device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf;
-    device_param->kernel_params[ 1] = &device_param->opencl_d_rules_c;
-    device_param->kernel_params[ 2] = &device_param->opencl_d_combs_c;
-    device_param->kernel_params[ 3] = &device_param->opencl_d_bfs_c;
-    device_param->kernel_params[ 4] = NULL; // &device_param->opencl_d_tmps;
-    device_param->kernel_params[ 5] = NULL; // &device_param->opencl_d_hooks;
-    device_param->kernel_params[ 6] = &device_param->opencl_d_bitmap_s1_a;
-    device_param->kernel_params[ 7] = &device_param->opencl_d_bitmap_s1_b;
-    device_param->kernel_params[ 8] = &device_param->opencl_d_bitmap_s1_c;
-    device_param->kernel_params[ 9] = &device_param->opencl_d_bitmap_s1_d;
-    device_param->kernel_params[10] = &device_param->opencl_d_bitmap_s2_a;
-    device_param->kernel_params[11] = &device_param->opencl_d_bitmap_s2_b;
-    device_param->kernel_params[12] = &device_param->opencl_d_bitmap_s2_c;
-    device_param->kernel_params[13] = &device_param->opencl_d_bitmap_s2_d;
-    device_param->kernel_params[14] = &device_param->opencl_d_plain_bufs;
-    device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
-    device_param->kernel_params[16] = &device_param->opencl_d_digests_shown;
-    device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
-    device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
-    device_param->kernel_params[19] = &device_param->opencl_d_result;
-    device_param->kernel_params[20] = &device_param->opencl_d_extra0_buf;
-    device_param->kernel_params[21] = &device_param->opencl_d_extra1_buf;
-    device_param->kernel_params[22] = &device_param->opencl_d_extra2_buf;
-    device_param->kernel_params[23] = &device_param->opencl_d_extra3_buf;
+    if (device_param->is_cuda == true)
+    {
+      device_param->kernel_params[ 0] = NULL; // &device_param->cuda_d_pws_buf;
+      device_param->kernel_params[ 1] = &device_param->cuda_d_rules_c;
+      device_param->kernel_params[ 2] = &device_param->cuda_d_combs_c;
+      device_param->kernel_params[ 3] = &device_param->cuda_d_bfs_c;
+      device_param->kernel_params[ 4] = NULL; // &device_param->cuda_d_tmps;
+      device_param->kernel_params[ 5] = NULL; // &device_param->cuda_d_hooks;
+      device_param->kernel_params[ 6] = &device_param->cuda_d_bitmap_s1_a;
+      device_param->kernel_params[ 7] = &device_param->cuda_d_bitmap_s1_b;
+      device_param->kernel_params[ 8] = &device_param->cuda_d_bitmap_s1_c;
+      device_param->kernel_params[ 9] = &device_param->cuda_d_bitmap_s1_d;
+      device_param->kernel_params[10] = &device_param->cuda_d_bitmap_s2_a;
+      device_param->kernel_params[11] = &device_param->cuda_d_bitmap_s2_b;
+      device_param->kernel_params[12] = &device_param->cuda_d_bitmap_s2_c;
+      device_param->kernel_params[13] = &device_param->cuda_d_bitmap_s2_d;
+      device_param->kernel_params[14] = &device_param->cuda_d_plain_bufs;
+      device_param->kernel_params[15] = &device_param->cuda_d_digests_buf;
+      device_param->kernel_params[16] = &device_param->cuda_d_digests_shown;
+      device_param->kernel_params[17] = &device_param->cuda_d_salt_bufs;
+      device_param->kernel_params[18] = &device_param->cuda_d_esalt_bufs;
+      device_param->kernel_params[19] = &device_param->cuda_d_result;
+      device_param->kernel_params[20] = &device_param->cuda_d_extra0_buf;
+      device_param->kernel_params[21] = &device_param->cuda_d_extra1_buf;
+      device_param->kernel_params[22] = &device_param->cuda_d_extra2_buf;
+      device_param->kernel_params[23] = &device_param->cuda_d_extra3_buf;
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf;
+      device_param->kernel_params[ 1] = &device_param->opencl_d_rules_c;
+      device_param->kernel_params[ 2] = &device_param->opencl_d_combs_c;
+      device_param->kernel_params[ 3] = &device_param->opencl_d_bfs_c;
+      device_param->kernel_params[ 4] = NULL; // &device_param->opencl_d_tmps;
+      device_param->kernel_params[ 5] = NULL; // &device_param->opencl_d_hooks;
+      device_param->kernel_params[ 6] = &device_param->opencl_d_bitmap_s1_a;
+      device_param->kernel_params[ 7] = &device_param->opencl_d_bitmap_s1_b;
+      device_param->kernel_params[ 8] = &device_param->opencl_d_bitmap_s1_c;
+      device_param->kernel_params[ 9] = &device_param->opencl_d_bitmap_s1_d;
+      device_param->kernel_params[10] = &device_param->opencl_d_bitmap_s2_a;
+      device_param->kernel_params[11] = &device_param->opencl_d_bitmap_s2_b;
+      device_param->kernel_params[12] = &device_param->opencl_d_bitmap_s2_c;
+      device_param->kernel_params[13] = &device_param->opencl_d_bitmap_s2_d;
+      device_param->kernel_params[14] = &device_param->opencl_d_plain_bufs;
+      device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
+      device_param->kernel_params[16] = &device_param->opencl_d_digests_shown;
+      device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
+      device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
+      device_param->kernel_params[19] = &device_param->opencl_d_result;
+      device_param->kernel_params[20] = &device_param->opencl_d_extra0_buf;
+      device_param->kernel_params[21] = &device_param->opencl_d_extra1_buf;
+      device_param->kernel_params[22] = &device_param->opencl_d_extra2_buf;
+      device_param->kernel_params[23] = &device_param->opencl_d_extra3_buf;
+    }
+
     device_param->kernel_params[24] = &device_param->kernel_params_buf32[24];
     device_param->kernel_params[25] = &device_param->kernel_params_buf32[25];
     device_param->kernel_params[26] = &device_param->kernel_params_buf32[26];
@@ -7416,13 +7540,29 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
       {
-        device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
+        if (device_param->is_cuda == true)
+        {
+          device_param->kernel_params_mp[0] = &device_param->cuda_d_combs;
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
+        }
       }
       else
       {
         if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
         {
-          device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
+          if (device_param->is_cuda == true)
+          {
+            device_param->kernel_params_mp[0] = &device_param->cuda_d_combs;
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            device_param->kernel_params_mp[0] = &device_param->opencl_d_combs;
+          }
         }
         else
         {
@@ -7432,8 +7572,18 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         }
       }
 
-      device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf;
-      device_param->kernel_params_mp[2] = &device_param->opencl_d_markov_css_buf;
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_mp[1] = &device_param->cuda_d_root_css_buf;
+        device_param->kernel_params_mp[2] = &device_param->cuda_d_markov_css_buf;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf;
+        device_param->kernel_params_mp[2] = &device_param->opencl_d_markov_css_buf;
+      }
+
       device_param->kernel_params_mp[3] = &device_param->kernel_params_mp_buf64[3];
       device_param->kernel_params_mp[4] = &device_param->kernel_params_mp_buf32[4];
       device_param->kernel_params_mp[5] = &device_param->kernel_params_mp_buf32[5];
@@ -7452,8 +7602,18 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_mp_l[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
                                                   // ? &device_param->opencl_d_pws_buf
                                                   // : &device_param->opencl_d_pws_amp_buf;
-      device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf;
-      device_param->kernel_params_mp_l[2] = &device_param->opencl_d_markov_css_buf;
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_mp_l[1] = &device_param->cuda_d_root_css_buf;
+        device_param->kernel_params_mp_l[2] = &device_param->cuda_d_markov_css_buf;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf;
+        device_param->kernel_params_mp_l[2] = &device_param->opencl_d_markov_css_buf;
+      }
+
       device_param->kernel_params_mp_l[3] = &device_param->kernel_params_mp_l_buf64[3];
       device_param->kernel_params_mp_l[4] = &device_param->kernel_params_mp_l_buf32[4];
       device_param->kernel_params_mp_l[5] = &device_param->kernel_params_mp_l_buf32[5];
@@ -7469,9 +7629,20 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_mp_r_buf32[7] = 0;
       device_param->kernel_params_mp_r_buf64[8] = 0;
 
-      device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs;
-      device_param->kernel_params_mp_r[1] = &device_param->opencl_d_root_css_buf;
-      device_param->kernel_params_mp_r[2] = &device_param->opencl_d_markov_css_buf;
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_mp_r[0] = &device_param->cuda_d_bfs;
+        device_param->kernel_params_mp_r[1] = &device_param->cuda_d_root_css_buf;
+        device_param->kernel_params_mp_r[2] = &device_param->cuda_d_markov_css_buf;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs;
+        device_param->kernel_params_mp_r[1] = &device_param->opencl_d_root_css_buf;
+        device_param->kernel_params_mp_r[2] = &device_param->opencl_d_markov_css_buf;
+      }
+
       device_param->kernel_params_mp_r[3] = &device_param->kernel_params_mp_r_buf64[3];
       device_param->kernel_params_mp_r[4] = &device_param->kernel_params_mp_r_buf32[4];
       device_param->kernel_params_mp_r[5] = &device_param->kernel_params_mp_r_buf32[5];
@@ -7482,11 +7653,24 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_amp_buf32[5] = 0; // combs_mode
       device_param->kernel_params_amp_buf64[6] = 0; // gid_max
 
-      device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf;
-      device_param->kernel_params_amp[1] = NULL; // &device_param->opencl_d_pws_amp_buf;
-      device_param->kernel_params_amp[2] = &device_param->opencl_d_rules_c;
-      device_param->kernel_params_amp[3] = &device_param->opencl_d_combs_c;
-      device_param->kernel_params_amp[4] = &device_param->opencl_d_bfs_c;
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_amp[0] = NULL; // &device_param->cuda_d_pws_buf;
+        device_param->kernel_params_amp[1] = NULL; // &device_param->cuda_d_pws_amp_buf;
+        device_param->kernel_params_amp[2] = &device_param->cuda_d_rules_c;
+        device_param->kernel_params_amp[3] = &device_param->cuda_d_combs_c;
+        device_param->kernel_params_amp[4] = &device_param->cuda_d_bfs_c;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf;
+        device_param->kernel_params_amp[1] = NULL; // &device_param->opencl_d_pws_amp_buf;
+        device_param->kernel_params_amp[2] = &device_param->opencl_d_rules_c;
+        device_param->kernel_params_amp[3] = &device_param->opencl_d_combs_c;
+        device_param->kernel_params_amp[4] = &device_param->opencl_d_bfs_c;
+      }
+
       device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5];
       device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf64[6];
 
@@ -7508,192 +7692,1222 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     device_param->kernel_params_decompress_buf64[3] = 0; // gid_max
 
-    device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx;
-    device_param->kernel_params_decompress[1] = NULL; // &device_param->opencl_d_pws_comp_buf;
-    device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                                      // ? &device_param->opencl_d_pws_buf
-                                                      // : &device_param->opencl_d_pws_amp_buf;
+    if (device_param->is_cuda == true)
+    {
+      device_param->kernel_params_decompress[0] = NULL; // &device_param->cuda_d_pws_idx;
+      device_param->kernel_params_decompress[1] = NULL; // &device_param->cuda_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                        // ? &device_param->cuda_d_pws_buf
+                                                        // : &device_param->cuda_d_pws_amp_buf;
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx;
+      device_param->kernel_params_decompress[1] = NULL; // &device_param->opencl_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                        // ? &device_param->opencl_d_pws_buf
+                                                        // : &device_param->opencl_d_pws_amp_buf;
+    }
+
     device_param->kernel_params_decompress[3] = &device_param->kernel_params_decompress_buf64[3];
 
     /**
      * kernel name
      */
 
-    char kernel_name[64] = { 0 };
-
-    if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+    if (device_param->is_cuda == true)
     {
-      if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH)
+      char kernel_name[64] = { 0 };
+
+      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
       {
-        if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+        if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH)
         {
-          // kernel1
+          if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+          {
+            // kernel1
 
-          snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4);
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
+            CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
+            CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
+            CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
+            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          // kernel2
+            // kernel2
 
-          snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8);
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
+            CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
+            CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
+            CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
+            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          // kernel3
+            // kernel3
 
-          snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16);
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
+            CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
+            CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
+            CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
+            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
+          }
+          else
+          {
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type);
+
+            CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4);
+
+            if (CL_rc == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
+
+            if (CL_rc == -1) return -1;
+          }
         }
         else
         {
-          snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type);
+          if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+          {
+            // kernel1
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel4);
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4);
 
-          if (CL_rc == -1) return -1;
+            CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name);
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_wgs4);
+            if (CL_rc == -1) return -1;
 
-          if (CL_rc == -1) return -1;
+            CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1);
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_local_mem_size4);
+            if (CL_rc == -1) return -1;
 
-          if (CL_rc == -1) return -1;
+            CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1);
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_preferred_wgs_multiple4);
+            if (CL_rc == -1) return -1;
 
-          if (CL_rc == -1) return -1;
+            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+
+            if (CL_rc == -1) return -1;
+
+            // kernel2
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8);
+
+            CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2);
+
+            if (CL_rc == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+
+            if (CL_rc == -1) return -1;
+
+            // kernel3
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16);
+
+            CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3);
+
+            if (CL_rc == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+
+            if (CL_rc == -1) return -1;
+          }
+          else
+          {
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type);
+
+            CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function4, device_param->cuda_module, kernel_name);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_wgs4);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function4, &device_param->kernel_local_mem_size4);
+
+            if (CL_rc == -1) return -1;
+
+            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
+
+            if (CL_rc == -1) return -1;
+          }
+        }
+
+        if (user_options->slow_candidates == true)
+        {
+        }
+        else
+        {
+          if (user_options->attack_mode == ATTACK_MODE_BF)
+          {
+            if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
+            {
+              snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
+
+              CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_tm, device_param->cuda_module, kernel_name);
+
+              if (CL_rc == -1) return -1;
+
+              CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_wgs_tm);
+
+              if (CL_rc == -1) return -1;
+
+              CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_tm, &device_param->kernel_local_mem_size_tm);
+
+              if (CL_rc == -1) return -1;
+
+              device_param->kernel_preferred_wgs_multiple_tm = device_param->cuda_warp_size;
+
+              if (CL_rc == -1) return -1;
+            }
+          }
         }
       }
       else
       {
-        if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+        // kernel1
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type);
+
+        CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function1, device_param->cuda_module, kernel_name);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_wgs1);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function1, &device_param->kernel_local_mem_size1);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+
+        if (CL_rc == -1) return -1;
+
+        // kernel2
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type);
+
+        CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2, device_param->cuda_module, kernel_name);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_wgs2);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2, &device_param->kernel_local_mem_size2);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+
+        if (CL_rc == -1) return -1;
+
+        // kernel3
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type);
+
+        CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function3, device_param->cuda_module, kernel_name);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_wgs3);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function3, &device_param->kernel_local_mem_size3);
+
+        if (CL_rc == -1) return -1;
+
+        device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+
+        if (CL_rc == -1) return -1;
+
+        // kernel12
+
+        if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
         {
-          // kernel1
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type);
 
-          snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4);
-
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function12, device_param->cuda_module, kernel_name);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_wgs12);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function12, &device_param->kernel_local_mem_size12);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
-
-          if (CL_rc == -1) return -1;
-
-          // kernel2
-
-          snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8);
-
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
-
-          if (CL_rc == -1) return -1;
-
-          // kernel3
-
-          snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16);
-
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
-
-          if (CL_rc == -1) return -1;
-
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
+          device_param->kernel_preferred_wgs_multiple12 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
-        else
+
+        // kernel23
+
+        if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
         {
-          snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type);
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type);
 
-          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel4);
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function23, device_param->cuda_module, kernel_name);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_wgs4);
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_wgs23);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_local_mem_size4);
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function23, &device_param->kernel_local_mem_size23);
 
           if (CL_rc == -1) return -1;
 
-          CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_preferred_wgs_multiple4);
+          device_param->kernel_preferred_wgs_multiple23 = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // init2
+
+        if (hashconfig->opts_type & OPTS_TYPE_INIT2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type);
+
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_init2, device_param->cuda_module, kernel_name);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_wgs_init2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_init2, &device_param->kernel_local_mem_size_init2);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // loop2
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type);
+
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2, device_param->cuda_module, kernel_name);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_wgs_loop2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2, &device_param->kernel_local_mem_size_loop2);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // aux1
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX1)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type);
+
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux1, device_param->cuda_module, kernel_name);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_wgs_aux1);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux1, &device_param->kernel_local_mem_size_aux1);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // aux2
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type);
+
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux2, device_param->cuda_module, kernel_name);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_wgs_aux2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux2, &device_param->kernel_local_mem_size_aux2);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // aux3
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX3)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type);
+
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux3, device_param->cuda_module, kernel_name);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_wgs_aux3);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux3, &device_param->kernel_local_mem_size_aux3);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // aux4
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX4)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type);
+
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_aux4, device_param->cuda_module, kernel_name);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_wgs_aux4);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_aux4, &device_param->kernel_local_mem_size_aux4);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+      }
+
+      // GPU memset
+
+      CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_memset, device_param->cuda_module, "gpu_memset");
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_wgs_memset);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_local_mem_size_memset);
+
+      if (CL_rc == -1) return -1;
+
+      device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size;
+
+      if (CL_rc == -1) return -1;
+
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 0, sizeof (cl_mem),   device_param->kernel_params_memset[0]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
+
+      // GPU autotune init
+
+      CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_atinit, device_param->cuda_module, "gpu_atinit");
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_wgs_atinit);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_local_mem_size_atinit);
+
+      if (CL_rc == -1) return -1;
+
+      device_param->kernel_preferred_wgs_multiple_atinit = device_param->cuda_warp_size;
+
+      if (CL_rc == -1) return -1;
+
+      // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
+      // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
+
+      // GPU decompress
+
+      CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_decompress, device_param->cuda_module, "gpu_decompress");
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_wgs_decompress);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_local_mem_size_decompress);
+
+      if (CL_rc == -1) return -1;
+
+      device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size;
+
+      if (CL_rc == -1) return -1;
+
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem),   device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem),   device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem),   device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
+
+      // MP start
+
+      if (user_options->slow_candidates == true)
+      {
+      }
+      else
+      {
+        if (user_options->attack_mode == ATTACK_MODE_BF)
+        {
+          // mp_l
+
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_l, device_param->cuda_module_mp, "l_markov");
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_wgs_mp_l);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_l, &device_param->kernel_local_mem_size_mp_l);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+
+          // mp_r
+
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp_r, device_param->cuda_module_mp, "r_markov");
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_wgs_mp_r);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp_r, &device_param->kernel_local_mem_size_mp_r);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+
+          if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
+          }
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+        {
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov");
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+        {
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov");
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_wgs_mp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_mp, &device_param->kernel_local_mem_size_mp);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
       }
 
+      if (user_options->slow_candidates == true)
+      {
+      }
+      else
+      {
+        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+        {
+          // nothing to do
+        }
+        else
+        {
+          CL_rc = hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_amp, device_param->cuda_module_amp, "amp");
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_wgs_amp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_amp, &device_param->kernel_local_mem_size_amp);
+
+          if (CL_rc == -1) return -1;
+
+          device_param->kernel_preferred_wgs_multiple_amp = device_param->cuda_warp_size;
+
+          if (CL_rc == -1) return -1;
+        }
+
+        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+        {
+          // nothing to do
+        }
+        else
+        {
+          for (u32 i = 0; i < 5; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
+
+            //if (CL_rc == -1) return -1;
+          }
+
+          for (u32 i = 5; i < 6; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
+
+            //if (CL_rc == -1) return -1;
+          }
+
+          for (u32 i = 6; i < 7; i++)
+          {
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
+
+            //if (CL_rc == -1) return -1;
+          }
+        }
+      }
+
+// zero some data buffers
+
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      char kernel_name[64] = { 0 };
+
+      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH)
+        {
+          if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+          {
+            // kernel1
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4);
+
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
+
+            if (CL_rc == -1) return -1;
+
+            // kernel2
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8);
+
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
+
+            if (CL_rc == -1) return -1;
+
+            // kernel3
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16);
+
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
+
+            if (CL_rc == -1) return -1;
+          }
+          else
+          {
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type);
+
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel4);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_wgs4);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_local_mem_size4);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_preferred_wgs_multiple4);
+
+            if (CL_rc == -1) return -1;
+          }
+        }
+        else
+        {
+          if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+          {
+            // kernel1
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4);
+
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
+
+            if (CL_rc == -1) return -1;
+
+            // kernel2
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8);
+
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
+
+            if (CL_rc == -1) return -1;
+
+            // kernel3
+
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16);
+
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
+
+            if (CL_rc == -1) return -1;
+          }
+          else
+          {
+            snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type);
+
+            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel4);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_wgs4);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_local_mem_size4);
+
+            if (CL_rc == -1) return -1;
+
+            CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel4, &device_param->kernel_preferred_wgs_multiple4);
+
+            if (CL_rc == -1) return -1;
+          }
+        }
+
+        if (user_options->slow_candidates == true)
+        {
+        }
+        else
+        {
+          if (user_options->attack_mode == ATTACK_MODE_BF)
+          {
+            if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
+            {
+              snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
+
+              CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_tm);
+
+              if (CL_rc == -1) return -1;
+
+              CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_wgs_tm);
+
+              if (CL_rc == -1) return -1;
+
+              CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_local_mem_size_tm);
+
+              if (CL_rc == -1) return -1;
+
+              CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_preferred_wgs_multiple_tm);
+
+              if (CL_rc == -1) return -1;
+            }
+          }
+        }
+      }
+      else
+      {
+        // kernel1
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type);
+
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
+
+        if (CL_rc == -1) return -1;
+
+        // kernel2
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type);
+
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
+
+        if (CL_rc == -1) return -1;
+
+        // kernel3
+
+        snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type);
+
+        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
+
+        if (CL_rc == -1) return -1;
+
+        CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
+
+        if (CL_rc == -1) return -1;
+
+        // kernel12
+
+        if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type);
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel12);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_wgs12);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_local_mem_size12);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_preferred_wgs_multiple12);
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // kernel23
+
+        if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type);
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel23);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_wgs23);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_local_mem_size23);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_preferred_wgs_multiple23);
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // init2
+
+        if (hashconfig->opts_type & OPTS_TYPE_INIT2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type);
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_init2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_wgs_init2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_local_mem_size_init2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_preferred_wgs_multiple_init2);
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // loop2
+
+        if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type);
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_loop2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_wgs_loop2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_local_mem_size_loop2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_preferred_wgs_multiple_loop2);
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // aux1
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX1)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type);
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux1);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_wgs_aux1);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_local_mem_size_aux1);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_preferred_wgs_multiple_aux1);
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // aux2
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX2)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type);
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_wgs_aux2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_local_mem_size_aux2);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_preferred_wgs_multiple_aux2);
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // aux3
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX3)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type);
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux3);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_wgs_aux3);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_local_mem_size_aux3);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_preferred_wgs_multiple_aux3);
+
+          if (CL_rc == -1) return -1;
+        }
+
+        // aux4
+
+        if (hashconfig->opts_type & OPTS_TYPE_AUX4)
+        {
+          snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type);
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux4);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_wgs_aux4);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_local_mem_size_aux4);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_preferred_wgs_multiple_aux4);
+
+          if (CL_rc == -1) return -1;
+        }
+      }
+
+      // GPU memset
+
+      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_memset", &device_param->opencl_kernel_memset);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_wgs_memset);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_local_mem_size_memset);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 0, sizeof (cl_mem),   device_param->kernel_params_memset[0]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
+
+      // GPU autotune init
+
+      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_atinit", &device_param->opencl_kernel_atinit);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_wgs_atinit);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_local_mem_size_atinit);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_preferred_wgs_multiple_atinit);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
+
+      // GPU decompress
+
+      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_decompress", &device_param->opencl_kernel_decompress);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_wgs_decompress);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_local_mem_size_decompress);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress);
+
+      if (CL_rc == -1) return -1;
+
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem),   device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem),   device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem),   device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
+
+      // MP start
+
       if (user_options->slow_candidates == true)
       {
       }
@@ -7701,575 +8915,231 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (user_options->attack_mode == ATTACK_MODE_BF)
         {
+          // mp_l
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "l_markov", &device_param->opencl_kernel_mp_l);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_wgs_mp_l);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_local_mem_size_mp_l);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_preferred_wgs_multiple_mp_l);
+
+          if (CL_rc == -1) return -1;
+
+          // mp_r
+
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "r_markov", &device_param->opencl_kernel_mp_r);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_wgs_mp_r);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_local_mem_size_mp_r);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r);
+
+          if (CL_rc == -1) return -1;
+
           if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
           {
-            snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
+            CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
+            CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
+          }
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+        {
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "C_markov", &device_param->opencl_kernel_mp);
 
-            CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_tm);
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_wgs_mp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_local_mem_size_mp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_preferred_wgs_multiple_mp);
+
+          if (CL_rc == -1) return -1;
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+        {
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "C_markov", &device_param->opencl_kernel_mp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_wgs_mp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_local_mem_size_mp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_preferred_wgs_multiple_mp);
+
+          if (CL_rc == -1) return -1;
+        }
+      }
+
+      if (user_options->slow_candidates == true)
+      {
+      }
+      else
+      {
+        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+        {
+          // nothing to do
+        }
+        else
+        {
+          CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_amp, "amp", &device_param->opencl_kernel_amp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_wgs_amp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_local_mem_size_amp);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_preferred_wgs_multiple_amp);
+
+          if (CL_rc == -1) return -1;
+        }
+
+        if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+        {
+          // nothing to do
+        }
+        else
+        {
+          for (u32 i = 0; i < 5; i++)
+          {
+            CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
 
             if (CL_rc == -1) return -1;
+          }
 
-            CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_wgs_tm);
+          for (u32 i = 5; i < 6; i++)
+          {
+            CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
 
             if (CL_rc == -1) return -1;
+          }
 
-            CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_local_mem_size_tm);
-
-            if (CL_rc == -1) return -1;
-
-            CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_tm, &device_param->kernel_preferred_wgs_multiple_tm);
+          for (u32 i = 6; i < 7; i++)
+          {
+            CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
 
             if (CL_rc == -1) return -1;
           }
         }
       }
-    }
-    else
-    {
-      // kernel1
 
-      snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type);
+      // zero some data buffers
 
-      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel1);
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
 
-      if (CL_rc == -1) return -1;
+      /**
+       * special buffers
+       */
 
-      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_wgs1);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_local_mem_size1);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel1, &device_param->kernel_preferred_wgs_multiple1);
-
-      if (CL_rc == -1) return -1;
-
-      // kernel2
-
-      snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type);
-
-      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_wgs2);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_local_mem_size2);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2, &device_param->kernel_preferred_wgs_multiple2);
-
-      if (CL_rc == -1) return -1;
-
-      // kernel3
-
-      snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type);
-
-      CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel3);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_wgs3);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_local_mem_size3);
-
-      if (CL_rc == -1) return -1;
-
-      CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel3, &device_param->kernel_preferred_wgs_multiple3);
-
-      if (CL_rc == -1) return -1;
-
-      // kernel12
-
-      if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
-      {
-        snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type);
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel12);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_wgs12);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_local_mem_size12);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel12, &device_param->kernel_preferred_wgs_multiple12);
-
-        if (CL_rc == -1) return -1;
-      }
-
-      // kernel23
-
-      if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
-      {
-        snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type);
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel23);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_wgs23);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_local_mem_size23);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel23, &device_param->kernel_preferred_wgs_multiple23);
-
-        if (CL_rc == -1) return -1;
-      }
-
-      // init2
-
-      if (hashconfig->opts_type & OPTS_TYPE_INIT2)
-      {
-        snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type);
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_init2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_wgs_init2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_local_mem_size_init2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_preferred_wgs_multiple_init2);
-
-        if (CL_rc == -1) return -1;
-      }
-
-      // loop2
-
-      if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
-      {
-        snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type);
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_loop2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_wgs_loop2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_local_mem_size_loop2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_loop2, &device_param->kernel_preferred_wgs_multiple_loop2);
-
-        if (CL_rc == -1) return -1;
-      }
-
-      // aux1
-
-      if (hashconfig->opts_type & OPTS_TYPE_AUX1)
-      {
-        snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type);
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux1);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_wgs_aux1);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_local_mem_size_aux1);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux1, &device_param->kernel_preferred_wgs_multiple_aux1);
-
-        if (CL_rc == -1) return -1;
-      }
-
-      // aux2
-
-      if (hashconfig->opts_type & OPTS_TYPE_AUX2)
-      {
-        snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type);
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_wgs_aux2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_local_mem_size_aux2);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux2, &device_param->kernel_preferred_wgs_multiple_aux2);
-
-        if (CL_rc == -1) return -1;
-      }
-
-      // aux3
-
-      if (hashconfig->opts_type & OPTS_TYPE_AUX3)
-      {
-        snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type);
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux3);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_wgs_aux3);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_local_mem_size_aux3);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux3, &device_param->kernel_preferred_wgs_multiple_aux3);
-
-        if (CL_rc == -1) return -1;
-      }
-
-      // aux4
-
-      if (hashconfig->opts_type & OPTS_TYPE_AUX4)
-      {
-        snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type);
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_aux4);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_wgs_aux4);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_local_mem_size_aux4);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_aux4, &device_param->kernel_preferred_wgs_multiple_aux4);
-
-        if (CL_rc == -1) return -1;
-      }
-    }
-
-    // GPU memset
-
-    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_memset", &device_param->opencl_kernel_memset);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_wgs_memset);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_local_mem_size_memset);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 0, sizeof (cl_mem),   device_param->kernel_params_memset[0]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
-
-    // GPU autotune init
-
-    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_atinit", &device_param->opencl_kernel_atinit);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_wgs_atinit);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_local_mem_size_atinit);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_atinit, &device_param->kernel_preferred_wgs_multiple_atinit);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
-
-    // GPU decompress
-
-    CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, "gpu_decompress", &device_param->opencl_kernel_decompress);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_wgs_decompress);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_local_mem_size_decompress);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_decompress, &device_param->kernel_preferred_wgs_multiple_decompress);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem),   device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem),   device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem),   device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
-
-    // MP start
-
-    if (user_options->slow_candidates == true)
-    {
-    }
-    else
-    {
-      if (user_options->attack_mode == ATTACK_MODE_BF)
-      {
-        // mp_l
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "l_markov", &device_param->opencl_kernel_mp_l);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_wgs_mp_l);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_local_mem_size_mp_l);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_l, &device_param->kernel_preferred_wgs_multiple_mp_l);
-
-        if (CL_rc == -1) return -1;
-
-        // mp_r
-
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "r_markov", &device_param->opencl_kernel_mp_r);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_wgs_mp_r);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_local_mem_size_mp_r);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r);
-
-        if (CL_rc == -1) return -1;
-
-        if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
-        {
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
-        }
-      }
-      else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
-      {
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "C_markov", &device_param->opencl_kernel_mp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_wgs_mp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_local_mem_size_mp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_preferred_wgs_multiple_mp);
-
-        if (CL_rc == -1) return -1;
-      }
-      else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
-      {
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "C_markov", &device_param->opencl_kernel_mp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_wgs_mp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_local_mem_size_mp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp, &device_param->kernel_preferred_wgs_multiple_mp);
-
-        if (CL_rc == -1) return -1;
-      }
-    }
-
-    if (user_options->slow_candidates == true)
-    {
-    }
-    else
-    {
-      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-      {
-        // nothing to do
-      }
-      else
-      {
-        CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_amp, "amp", &device_param->opencl_kernel_amp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_wgs_amp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_local_mem_size_amp);
-
-        if (CL_rc == -1) return -1;
-
-        CL_rc = get_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_amp, &device_param->kernel_preferred_wgs_multiple_amp);
-
-        if (CL_rc == -1) return -1;
-      }
-
-      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-      {
-        // nothing to do
-      }
-      else
-      {
-        for (u32 i = 0; i < 5; i++)
-        {
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]);
-
-          if (CL_rc == -1) return -1;
-        }
-
-        for (u32 i = 5; i < 6; i++)
-        {
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]);
-
-          if (CL_rc == -1) return -1;
-        }
-
-        for (u32 i = 6; i < 7; i++)
-        {
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
-
-          if (CL_rc == -1) return -1;
-        }
-      }
-    }
-
-    // zero some data buffers
-
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
-
-    /**
-     * special buffers
-     */
-
-    if (user_options->slow_candidates == true)
-    {
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
-    }
-    else
-    {
-      if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+      if (user_options->slow_candidates == true)
       {
         CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
       }
-      else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+      else
       {
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs,          size_combs);       if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c,        size_combs);       if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
-      }
-      else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
-      {
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs,            size_bfs);         if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c,          size_bfs);         if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c,           size_tm);          if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
-      }
-    }
-
-    if (user_options->slow_candidates == true)
-    {
-    }
-    else
-    {
-      if ((user_options->attack_mode == ATTACK_MODE_HYBRID1) || (user_options->attack_mode == ATTACK_MODE_HYBRID2))
-      {
-        /**
-         * prepare mp
-         */
-
-        if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
-          device_param->kernel_params_mp_buf32[5] = 0;
-          device_param->kernel_params_mp_buf32[6] = 0;
-          device_param->kernel_params_mp_buf32[7] = 0;
-
-          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_buf32[5] = full01;
-          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_buf32[5] = full06;
-          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_buf32[5] = full80;
-          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1;
-          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
         }
-        else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
-          device_param->kernel_params_mp_buf32[5] = 0;
-          device_param->kernel_params_mp_buf32[6] = 0;
-          device_param->kernel_params_mp_buf32[7] = 0;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs,          size_combs);       if (CL_rc == -1) return -1;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c,        size_combs);       if (CL_rc == -1) return -1;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs,            size_bfs);         if (CL_rc == -1) return -1;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c,          size_bfs);         if (CL_rc == -1) return -1;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c,           size_tm);          if (CL_rc == -1) return -1;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
+          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
         }
-
-        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
       }
-      else if (user_options->attack_mode == ATTACK_MODE_BF)
+
+      if (user_options->slow_candidates == true)
       {
-        /**
-         * prepare mp_r and mp_l
-         */
+      }
+      else
+      {
+        if ((user_options->attack_mode == ATTACK_MODE_HYBRID1) || (user_options->attack_mode == ATTACK_MODE_HYBRID2))
+        {
+          /**
+           * prepare mp
+           */
 
-        device_param->kernel_params_mp_l_buf32[6] = 0;
-        device_param->kernel_params_mp_l_buf32[7] = 0;
-        device_param->kernel_params_mp_l_buf32[8] = 0;
+          if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+          {
+            device_param->kernel_params_mp_buf32[5] = 0;
+            device_param->kernel_params_mp_buf32[6] = 0;
+            device_param->kernel_params_mp_buf32[7] = 0;
 
-        if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_l_buf32[6] = full01;
-        if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_l_buf32[6] = full06;
-        if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_l_buf32[6] = full80;
-        if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
-        if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_buf32[5] = full01;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_buf32[5] = full06;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_buf32[5] = full80;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1;
+          }
+          else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+          {
+            device_param->kernel_params_mp_buf32[5] = 0;
+            device_param->kernel_params_mp_buf32[6] = 0;
+            device_param->kernel_params_mp_buf32[7] = 0;
+          }
 
-        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
-        for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+          for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_BF)
+        {
+          /**
+           * prepare mp_r and mp_l
+           */
+
+          device_param->kernel_params_mp_l_buf32[6] = 0;
+          device_param->kernel_params_mp_l_buf32[7] = 0;
+          device_param->kernel_params_mp_l_buf32[8] = 0;
+
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_l_buf32[6] = full01;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_l_buf32[6] = full06;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_l_buf32[6] = full80;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;
+
+          for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+          for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+        }
       }
     }
 
@@ -8277,7 +9147,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * now everything that depends on threads and accel, basically dynamic workload
      */
 
-    u32 kernel_threads = get_kernel_threads (hashcat_ctx, device_param);
+    u32 kernel_threads = 0;
+
+    if (device_param->is_cuda == true)
+    {
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      kernel_threads = get_opencl_kernel_threads (hashcat_ctx, device_param);
+    }
 
     // this is required because inside the kernels there is this:
     // __local pw_t s_pws[64];
@@ -8472,19 +9351,26 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     device_param->size_brain_link_out = size_brain_link_out;
     #endif
 
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws,      NULL, &device_param->opencl_d_pws_buf);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws_amp,  NULL, &device_param->opencl_d_pws_amp_buf);  if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_comp, NULL, &device_param->opencl_d_pws_comp_buf); if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_idx,  NULL, &device_param->opencl_d_pws_idx);      if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_tmps,     NULL, &device_param->opencl_d_tmps);         if (CL_rc == -1) return -1;
-    CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_hooks,    NULL, &device_param->opencl_d_hooks);        if (CL_rc == -1) return -1;
+    if (device_param->is_cuda == true)
+    {
+    }
 
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_amp_buf,   device_param->size_pws_amp);  if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_comp_buf,  device_param->size_pws_comp); if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_idx,       device_param->size_pws_idx);  if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
+    if (device_param->is_opencl == true)
+    {
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws,      NULL, &device_param->opencl_d_pws_buf);      if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_pws_amp,  NULL, &device_param->opencl_d_pws_amp_buf);  if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_comp, NULL, &device_param->opencl_d_pws_comp_buf); if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_pws_idx,  NULL, &device_param->opencl_d_pws_idx);      if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_tmps,     NULL, &device_param->opencl_d_tmps);         if (CL_rc == -1) return -1;
+      CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_hooks,    NULL, &device_param->opencl_d_hooks);        if (CL_rc == -1) return -1;
+
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_amp_buf,   device_param->size_pws_amp);  if (CL_rc == -1) return -1;
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_comp_buf,  device_param->size_pws_comp); if (CL_rc == -1) return -1;
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_idx,       device_param->size_pws_idx);  if (CL_rc == -1) return -1;
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
+      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
+    }
 
     /**
      * main host data
@@ -8533,9 +9419,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * kernel args
      */
 
-    device_param->kernel_params[ 0] = &device_param->opencl_d_pws_buf;
-    device_param->kernel_params[ 4] = &device_param->opencl_d_tmps;
-    device_param->kernel_params[ 5] = &device_param->opencl_d_hooks;
+    if (device_param->is_cuda == true)
+    {
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      device_param->kernel_params[ 0] = &device_param->opencl_d_pws_buf;
+      device_param->kernel_params[ 4] = &device_param->opencl_d_tmps;
+      device_param->kernel_params[ 5] = &device_param->opencl_d_hooks;
+    }
 
     if (user_options->slow_candidates == true)
     {
@@ -8550,21 +9443,35 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
         {
-          device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                            ? &device_param->opencl_d_pws_buf
-                                            : &device_param->opencl_d_pws_amp_buf;
+          if (device_param->is_cuda == true)
+          {
+          }
 
-          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
+          if (device_param->is_opencl == true)
+          {
+            device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                              ? &device_param->opencl_d_pws_buf
+                                              : &device_param->opencl_d_pws_amp_buf;
+
+            CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
+          }
         }
       }
 
       if (user_options->attack_mode == ATTACK_MODE_BF)
       {
-        device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                            ? &device_param->opencl_d_pws_buf
-                                            : &device_param->opencl_d_pws_amp_buf;
+        if (device_param->is_cuda == true)
+        {
+        }
 
-        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
+        if (device_param->is_opencl == true)
+        {
+          device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                              ? &device_param->opencl_d_pws_buf
+                                              : &device_param->opencl_d_pws_amp_buf;
+
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
+        }
       }
 
       if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -8573,23 +9480,37 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
       else
       {
-        device_param->kernel_params_amp[0] = &device_param->opencl_d_pws_buf;
-        device_param->kernel_params_amp[1] = &device_param->opencl_d_pws_amp_buf;
+        if (device_param->is_cuda == true)
+        {
+        }
 
-        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1;
-        CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
+        if (device_param->is_opencl == true)
+        {
+          device_param->kernel_params_amp[0] = &device_param->opencl_d_pws_buf;
+          device_param->kernel_params_amp[1] = &device_param->opencl_d_pws_amp_buf;
+
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1;
+          CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
+        }
       }
     }
 
-    device_param->kernel_params_decompress[0] = &device_param->opencl_d_pws_idx;
-    device_param->kernel_params_decompress[1] = &device_param->opencl_d_pws_comp_buf;
-    device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-                                              ? &device_param->opencl_d_pws_buf
-                                              : &device_param->opencl_d_pws_amp_buf;
+    if (device_param->is_cuda == true)
+    {
+    }
 
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+    if (device_param->is_opencl == true)
+    {
+      device_param->kernel_params_decompress[0] = &device_param->opencl_d_pws_idx;
+      device_param->kernel_params_decompress[1] = &device_param->opencl_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                ? &device_param->opencl_d_pws_buf
+                                                : &device_param->opencl_d_pws_amp_buf;
+
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
+    }
 
     hardware_power_all += device_param->hardware_power;
 

From f2948460c9d3dbf9bb2fa0ffa07146ab28cc08a9 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 4 May 2019 10:13:43 +0200
Subject: [PATCH 25/73] Some first kernel invocations

---
 include/backend.h |  30 ++--
 src/autotune.c    |  10 +-
 src/backend.c     | 436 ++++++++++++++++++++++++++++++++++++++--------
 src/selftest.c    |  20 +--
 4 files changed, 395 insertions(+), 101 deletions(-)

diff --git a/include/backend.h b/include/backend.h
index d479cc5b3..fede1f92b 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -58,6 +58,11 @@ int hc_cuModuleLoadDataEx        (hashcat_ctx_t *hashcat_ctx, CUmodule *module,
 int hc_cuModuleUnload            (hashcat_ctx_t *hashcat_ctx, CUmodule hmod);
 int hc_cuModuleGetFunction       (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
 int hc_cuFuncGetAttribute        (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc);
+int hc_cuStreamCreate            (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags);
+int hc_cuStreamDestroy           (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
+int hc_cuStreamSynchronize       (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
+int hc_cuLaunchKernel            (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
+int hc_cuCtxSynchronize          (hashcat_ctx_t *hashcat_ctx);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
@@ -98,16 +103,21 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
 void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 pws_cnt, const u8 chr);
 
-int run_kernel            (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num, const u32 event_update, const u32 iteration);
-int run_kernel_mp         (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num);
-int run_kernel_tm         (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param);
-int run_kernel_amp        (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
-int run_kernel_atinit     (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
-int run_kernel_memset     (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
-int run_kernel_bzero      (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
-int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
-int run_copy              (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt);
-int run_cracker           (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt);
+int run_cuda_kernel_atinit    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
+int run_cuda_kernel_memset    (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size);
+int run_cuda_kernel_bzero     (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
+
+int run_opencl_kernel_atinit  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
+int run_opencl_kernel_memset  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
+int run_opencl_kernel_bzero   (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
+
+int run_kernel                (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num, const u32 event_update, const u32 iteration);
+int run_kernel_mp             (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num);
+int run_kernel_tm             (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param);
+int run_kernel_amp            (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
+int run_kernel_decompress     (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
+int run_copy                  (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt);
+int run_cracker               (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt);
 
 void generate_source_kernel_filename     (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *shared_dir, char *source_file);
 void generate_cached_kernel_filename     (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *profile_dir, const char *device_name_chksum, char *cached_file);
diff --git a/src/autotune.c b/src/autotune.c
index a679ccbec..80d438bd7 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -105,7 +105,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   int CL_rc;
 
-  CL_rc = run_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max);
+  CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max);
 
   if (CL_rc == -1) return -1;
 
@@ -230,21 +230,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   // reset them fake words
 
-  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 0, device_param->size_pws);
+  CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 0, device_param->size_pws);
 
   if (CL_rc == -1) return -1;
 
   // reset other buffers in case autotune cracked something
 
-  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, 0, device_param->size_plains);
+  CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, 0, device_param->size_plains);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, 0, device_param->size_shown);
+  CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, 0, device_param->size_shown);
 
   if (CL_rc == -1) return -1;
 
-  CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_result, 0, device_param->size_results);
+  CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_result, 0, device_param->size_results);
 
   if (CL_rc == -1) return -1;
 
diff --git a/src/backend.c b/src/backend.c
index bba291d9e..47c90887c 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -1464,7 +1464,140 @@ int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attri
   return 0;
 }
 
+int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuStreamCreate (phStream, Flags);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuStreamCreate(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuStreamCreate(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuStreamDestroy (hashcat_ctx_t *hashcat_ctx, CUstream hStream)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuStreamDestroy (hStream);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuStreamDestroy(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuStreamDestroy(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuStreamSynchronize (hashcat_ctx_t *hashcat_ctx, CUstream hStream)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuStreamSynchronize (hStream);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuStreamSynchronize(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuStreamSynchronize(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuLaunchKernel(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuLaunchKernel(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuCtxSynchronize ();
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuCtxSynchronize(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuCtxSynchronize(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
 
 // OpenCL
 
@@ -2198,7 +2331,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         {
           const u32 size_tm = 32 * sizeof (bs_word_t);
 
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm);
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm);
 
           if (CL_rc == -1) return -1;
 
@@ -2881,7 +3014,43 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   return 0;
 }
 
-int run_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num)
+int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num)
+{
+  u64 num_elements = num;
+
+  device_param->kernel_params_decompress_buf64[3] = num_elements;
+
+  const u64 kernel_threads = device_param->kernel_wgs_decompress;
+
+  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+
+  cl_kernel kernel = device_param->opencl_kernel_decompress;
+
+  const size_t global_work_size[3] = { num_elements,    1, 1 };
+  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+
+  int CL_rc;
+
+  CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]);
+
+  if (CL_rc == -1) return -1;
+
+  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+  if (CL_rc == -1) return -1;
+
+  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+
+  if (CL_rc == -1) return -1;
+
+  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+  if (CL_rc == -1) return -1;
+
+  return 0;
+}
+
+int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num)
 {
   u64 num_elements = num;
 
@@ -2921,7 +3090,66 @@ int run_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
   return 0;
 }
 
-int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size)
+int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size)
+{
+  const u64 num16d = size / 16;
+  const u64 num16m = size % 16;
+
+  if (num16d)
+  {
+    device_param->kernel_params_memset[0]       = (void *) &buf;
+    device_param->kernel_params_memset_buf32[1] = value;
+    device_param->kernel_params_memset_buf64[2] = num16d;
+
+    const u64 kernel_threads = device_param->kernel_wgs_memset;
+
+    u64 num_elements = num16d;
+
+    num_elements = CEILDIV (num_elements, kernel_threads);
+
+    CUfunction function = device_param->cuda_function_memset;
+
+    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem),   (void *) &buf);                         if (CU_rc == -1) return -1;
+    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CU_rc == -1) return -1;
+    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CU_rc == -1) return -1;
+
+    //const size_t global_work_size[3] = { num_elements,   1, 1 };
+    //const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_memset, NULL);
+
+    if (rc_cuLaunchKernel == -1) return -1;
+
+    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+
+    if (rc_cuCtxSynchronize == -1) return -1;
+  }
+
+  if (num16m)
+  {
+    u32 tmp[4];
+
+    tmp[0] = value;
+    tmp[1] = value;
+    tmp[2] = value;
+    tmp[3] = value;
+
+    // Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/
+
+    const int rc_cuMemcpyHtoD = hc_cuMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m);
+
+    if (rc_cuMemcpyHtoD == -1) return -1;
+  }
+
+  return 0;
+}
+
+int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size)
+{
+  return run_cuda_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
+}
+
+int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size)
 {
   const u64 num16d = size / 16;
   const u64 num16m = size % 16;
@@ -2980,45 +3208,9 @@ int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
   return 0;
 }
 
-int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num)
+int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size)
 {
-  u64 num_elements = num;
-
-  device_param->kernel_params_decompress_buf64[3] = num_elements;
-
-  const u64 kernel_threads = device_param->kernel_wgs_decompress;
-
-  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
-
-  cl_kernel kernel = device_param->opencl_kernel_decompress;
-
-  const size_t global_work_size[3] = { num_elements,    1, 1 };
-  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
-
-  int CL_rc;
-
-  CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
-
-  if (CL_rc == -1) return -1;
-
-  return 0;
-}
-
-int run_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size)
-{
-  return run_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
+  return run_opencl_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
 }
 
 int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt)
@@ -5178,9 +5370,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
             if ((backend_ctx->cuda == NULL) || (backend_ctx->nvrtc == NULL))
             {
-              event_log_warning (hashcat_ctx, "* Device #%u: No CUDA Toolkit installation detected.", device_id + 1);
-              event_log_warning (hashcat_ctx, "             Please install CUDA Toolkit for best utilization of this device");
-              event_log_warning (hashcat_ctx, "             Falling back to OpenCL");
+              event_log_warning (hashcat_ctx, "* Device #%u: CUDA SDK Toolkit installation NOT detected.", device_id + 1);
+              event_log_warning (hashcat_ctx, "             CUDA SDK Toolkit installation required for proper device support and utilization");
+              event_log_warning (hashcat_ctx, "             Falling back to OpenCL Runtime");
+
               event_log_warning (hashcat_ctx, NULL);
             }
           }
@@ -5837,7 +6030,7 @@ static int get_opencl_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_devi
   return 0;
 }
 
-static u32 get_opencl_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param)
+static u32 get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param)
 {
   const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
 
@@ -8382,8 +8575,95 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         }
       }
 
-// zero some data buffers
+      // zero some data buffers
 
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs,    device_param->size_plains);   if (CU_rc == -1) return -1;
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, device_param->size_shown);    if (CU_rc == -1) return -1;
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result,        device_param->size_results);  if (CU_rc == -1) return -1;
+
+      /**
+       * special buffers
+       */
+
+      if (user_options->slow_candidates == true)
+      {
+        CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c); if (CU_rc == -1) return -1;
+      }
+      else
+      {
+        if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+        {
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, size_rules_c); if (CU_rc == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
+        {
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs,          size_combs);       if (CU_rc == -1) return -1;
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c,        size_combs);       if (CU_rc == -1) return -1;
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf,   size_root_css);    if (CU_rc == -1) return -1;
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css);  if (CU_rc == -1) return -1;
+        }
+        else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
+        {
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs,            size_bfs);         if (CU_rc == -1) return -1;
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c,          size_bfs);         if (CU_rc == -1) return -1;
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c,           size_tm);          if (CU_rc == -1) return -1;
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_root_css_buf,   size_root_css);    if (CU_rc == -1) return -1;
+          CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_markov_css_buf, size_markov_css);  if (CU_rc == -1) return -1;
+        }
+      }
+
+      if (user_options->slow_candidates == true)
+      {
+      }
+      else
+      {
+        if ((user_options->attack_mode == ATTACK_MODE_HYBRID1) || (user_options->attack_mode == ATTACK_MODE_HYBRID2))
+        {
+          /**
+           * prepare mp
+           */
+
+          if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
+          {
+            device_param->kernel_params_mp_buf32[5] = 0;
+            device_param->kernel_params_mp_buf32[6] = 0;
+            device_param->kernel_params_mp_buf32[7] = 0;
+
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_buf32[5] = full01;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_buf32[5] = full06;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_buf32[5] = full80;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1;
+            if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1;
+          }
+          else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
+          {
+            device_param->kernel_params_mp_buf32[5] = 0;
+            device_param->kernel_params_mp_buf32[6] = 0;
+            device_param->kernel_params_mp_buf32[7] = 0;
+          }
+
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+        }
+        else if (user_options->attack_mode == ATTACK_MODE_BF)
+        {
+          /**
+           * prepare mp_r and mp_l
+           */
+
+          device_param->kernel_params_mp_l_buf32[6] = 0;
+          device_param->kernel_params_mp_l_buf32[7] = 0;
+          device_param->kernel_params_mp_l_buf32[8] = 0;
+
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)     device_param->kernel_params_mp_l_buf32[6] = full01;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)     device_param->kernel_params_mp_l_buf32[6] = full06;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)     device_param->kernel_params_mp_l_buf32[6] = full80;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
+          if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;
+
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+          //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+        }
+      }
     }
 
     if (device_param->is_opencl == true)
@@ -9054,9 +9334,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       // zero some data buffers
 
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
 
       /**
        * special buffers
@@ -9064,28 +9344,28 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (user_options->slow_candidates == true)
       {
-        CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
+        CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
       }
       else
       {
         if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, size_rules_c); if (CL_rc == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs,          size_combs);       if (CL_rc == -1) return -1;
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c,        size_combs);       if (CL_rc == -1) return -1;
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs,          size_combs);       if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c,        size_combs);       if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
         {
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs,            size_bfs);         if (CL_rc == -1) return -1;
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c,          size_bfs);         if (CL_rc == -1) return -1;
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c,           size_tm);          if (CL_rc == -1) return -1;
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
-          CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs,            size_bfs);         if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c,          size_bfs);         if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c,           size_tm);          if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_root_css_buf,   size_root_css);    if (CL_rc == -1) return -1;
+          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_markov_css_buf, size_markov_css);  if (CL_rc == -1) return -1;
         }
       }
 
@@ -9147,16 +9427,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * now everything that depends on threads and accel, basically dynamic workload
      */
 
-    u32 kernel_threads = 0;
-
-    if (device_param->is_cuda == true)
-    {
-    }
-
-    if (device_param->is_opencl == true)
-    {
-      kernel_threads = get_opencl_kernel_threads (hashcat_ctx, device_param);
-    }
+    u32 kernel_threads = get_kernel_threads (hashcat_ctx, device_param);
 
     // this is required because inside the kernels there is this:
     // __local pw_t s_pws[64];
@@ -9353,6 +9624,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_cuda == true)
     {
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_pws_buf,      size_pws);      if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_pws_amp_buf,  size_pws_amp);  if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_pws_comp_buf, size_pws_comp); if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_pws_idx,      size_pws_idx);  if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tmps,         size_tmps);     if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_hooks,        size_hooks);    if (CU_rc == -1) return -1;
+
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_pws_buf,       device_param->size_pws);      if (CU_rc == -1) return -1;
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_pws_amp_buf,   device_param->size_pws_amp);  if (CU_rc == -1) return -1;
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_pws_comp_buf,  device_param->size_pws_comp); if (CU_rc == -1) return -1;
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_pws_idx,       device_param->size_pws_idx);  if (CU_rc == -1) return -1;
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tmps,          device_param->size_tmps);     if (CU_rc == -1) return -1;
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_hooks,         device_param->size_hooks);    if (CU_rc == -1) return -1;
     }
 
     if (device_param->is_opencl == true)
@@ -9364,12 +9648,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_tmps,     NULL, &device_param->opencl_d_tmps);         if (CL_rc == -1) return -1;
       CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE,  size_hooks,    NULL, &device_param->opencl_d_hooks);        if (CL_rc == -1) return -1;
 
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_amp_buf,   device_param->size_pws_amp);  if (CL_rc == -1) return -1;
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_comp_buf,  device_param->size_pws_comp); if (CL_rc == -1) return -1;
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_idx,       device_param->size_pws_idx);  if (CL_rc == -1) return -1;
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_amp_buf,   device_param->size_pws_amp);  if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_comp_buf,  device_param->size_pws_comp); if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_idx,       device_param->size_pws_idx);  if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
     }
 
     /**
diff --git a/src/selftest.c b/src/selftest.c
index befef1988..81d9b415e 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -511,16 +511,16 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
   device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
 
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
-  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
+  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
+  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
+  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
+  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
+  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
+  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
 
   if (user_options->slow_candidates == true)
   {
-    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
+    CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
 
     if (CL_rc == -1) return -1;
   }
@@ -528,19 +528,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   {
     if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
     {
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
 
       if (CL_rc == -1) return -1;
     }
     else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
     {
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs);
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs);
 
       if (CL_rc == -1) return -1;
     }
     else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
     {
-      CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs);
+      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs);
 
       if (CL_rc == -1) return -1;
     }

From 4df00033d71b52295996c22a82f28a154ec59c22 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 4 May 2019 10:44:03 +0200
Subject: [PATCH 26/73] Prepare CUDA events

---
 include/backend.h |  14 +-
 include/types.h   |   1 -
 src/backend.c     | 734 ++++++++++++++++++++++++++++++++++------------
 3 files changed, 559 insertions(+), 190 deletions(-)

diff --git a/include/backend.h b/include/backend.h
index fede1f92b..057edb87f 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -42,27 +42,33 @@ int hc_nvrtcGetPTX               (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog,
 int hc_cuCtxCreate               (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev);
 int hc_cuCtxDestroy              (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
 int hc_cuCtxSetCurrent           (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
+int hc_cuCtxSynchronize          (hashcat_ctx_t *hashcat_ctx);
 int hc_cuDeviceGetAttribute      (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev);
 int hc_cuDeviceGetCount          (hashcat_ctx_t *hashcat_ctx, int *count);
 int hc_cuDeviceGet               (hashcat_ctx_t *hashcat_ctx, CUdevice *device, int ordinal);
 int hc_cuDeviceGetName           (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev);
 int hc_cuDeviceTotalMem          (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev);
 int hc_cuDriverGetVersion        (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
+int hc_cuEventCreate             (hashcat_ctx_t *hashcat_ctx, CUevent *phEvent, unsigned int Flags);
+int hc_cuEventDestroy            (hashcat_ctx_t *hashcat_ctx, CUevent hEvent);
+int hc_cuEventElapsedTime        (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd);
+int hc_cuEventQuery              (hashcat_ctx_t *hashcat_ctx, CUevent hEvent);
+int hc_cuEventRecord             (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream);
+int hc_cuEventSynchronize        (hashcat_ctx_t *hashcat_ctx, CUevent hEvent);
+int hc_cuFuncGetAttribute        (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc);
 int hc_cuInit                    (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
+int hc_cuLaunchKernel            (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
 int hc_cuMemAlloc                (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize);
 int hc_cuMemcpyDtoD              (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
 int hc_cuMemcpyDtoH              (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
 int hc_cuMemcpyHtoD              (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
 int hc_cuMemFree                 (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr);
+int hc_cuModuleGetFunction       (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
 int hc_cuModuleLoadDataEx        (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
 int hc_cuModuleUnload            (hashcat_ctx_t *hashcat_ctx, CUmodule hmod);
-int hc_cuModuleGetFunction       (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
-int hc_cuFuncGetAttribute        (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc);
 int hc_cuStreamCreate            (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags);
 int hc_cuStreamDestroy           (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
 int hc_cuStreamSynchronize       (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
-int hc_cuLaunchKernel            (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
-int hc_cuCtxSynchronize          (hashcat_ctx_t *hashcat_ctx);
 
 int hc_clBuildProgram            (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
 int hc_clCreateBuffer            (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
diff --git a/include/types.h b/include/types.h
index 861b27858..5ff50d5d4 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1206,7 +1206,6 @@ typedef struct hc_device_param
 
   double  spin_damp;
 
-
   void   *kernel_params[PARAMCNT];
   void   *kernel_params_mp[PARAMCNT];
   void   *kernel_params_mp_r[PARAMCNT];
diff --git a/src/backend.c b/src/backend.c
index 47c90887c..434abb364 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -1599,6 +1599,168 @@ int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx)
   return 0;
 }
 
+int hc_cuEventCreate (hashcat_ctx_t *hashcat_ctx, CUevent *phEvent, unsigned int Flags)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuEventCreate (phEvent, Flags);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuEventCreate(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuEventCreate(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuEventDestroy (hashcat_ctx_t *hashcat_ctx, CUevent hEvent)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuEventDestroy (hEvent);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuEventDestroy(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuEventDestroy(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuEventElapsedTime (pMilliseconds, hStart, hEnd);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuEventElapsedTime(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuEventElapsedTime(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuEventQuery (hashcat_ctx_t *hashcat_ctx, CUevent hEvent)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuEventQuery (hEvent);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuEventQuery(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuEventQuery(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuEventRecord (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuEventRecord (hEvent, hStream);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuEventRecord(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuEventRecord(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuEventSynchronize (hEvent);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuEventSynchronize(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuEventSynchronize(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 // OpenCL
 
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
@@ -2623,249 +2785,424 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
   device_param->kernel_params_buf64[34] = num;
 
-  u64       kernel_threads = 0;
-  cl_kernel kernel = NULL;
+  u64 kernel_threads = 0;
 
   switch (kern_run)
   {
-    case KERN_RUN_1:
-      kernel          = device_param->opencl_kernel1;
-      kernel_threads  = device_param->kernel_wgs1;
-      break;
-    case KERN_RUN_12:
-      kernel          = device_param->opencl_kernel12;
-      kernel_threads  = device_param->kernel_wgs12;
-      break;
-    case KERN_RUN_2:
-      kernel          = device_param->opencl_kernel2;
-      kernel_threads  = device_param->kernel_wgs2;
-      break;
-    case KERN_RUN_23:
-      kernel          = device_param->opencl_kernel23;
-      kernel_threads  = device_param->kernel_wgs23;
-      break;
-    case KERN_RUN_3:
-      kernel          = device_param->opencl_kernel3;
-      kernel_threads  = device_param->kernel_wgs3;
-      break;
-    case KERN_RUN_4:
-      kernel          = device_param->opencl_kernel4;
-      kernel_threads  = device_param->kernel_wgs4;
-      break;
-    case KERN_RUN_INIT2:
-      kernel          = device_param->opencl_kernel_init2;
-      kernel_threads  = device_param->kernel_wgs_init2;
-      break;
-    case KERN_RUN_LOOP2:
-      kernel          = device_param->opencl_kernel_loop2;
-      kernel_threads  = device_param->kernel_wgs_loop2;
-      break;
-    case KERN_RUN_AUX1:
-      kernel          = device_param->opencl_kernel_aux1;
-      kernel_threads  = device_param->kernel_wgs_aux1;
-      break;
-    case KERN_RUN_AUX2:
-      kernel          = device_param->opencl_kernel_aux2;
-      kernel_threads  = device_param->kernel_wgs_aux2;
-      break;
-    case KERN_RUN_AUX3:
-      kernel          = device_param->opencl_kernel_aux3;
-      kernel_threads  = device_param->kernel_wgs_aux3;
-      break;
-    case KERN_RUN_AUX4:
-      kernel          = device_param->opencl_kernel_aux4;
-      kernel_threads  = device_param->kernel_wgs_aux4;
-      break;
-    default:
-      event_log_error (hashcat_ctx, "Invalid kernel specified.");
-      return -1;
+    case KERN_RUN_1:      kernel_threads  = device_param->kernel_wgs1;      break;
+    case KERN_RUN_12:     kernel_threads  = device_param->kernel_wgs12;     break;
+    case KERN_RUN_2:      kernel_threads  = device_param->kernel_wgs2;      break;
+    case KERN_RUN_23:     kernel_threads  = device_param->kernel_wgs23;     break;
+    case KERN_RUN_3:      kernel_threads  = device_param->kernel_wgs3;      break;
+    case KERN_RUN_4:      kernel_threads  = device_param->kernel_wgs4;      break;
+    case KERN_RUN_INIT2:  kernel_threads  = device_param->kernel_wgs_init2; break;
+    case KERN_RUN_LOOP2:  kernel_threads  = device_param->kernel_wgs_loop2; break;
+    case KERN_RUN_AUX1:   kernel_threads  = device_param->kernel_wgs_aux1;  break;
+    case KERN_RUN_AUX2:   kernel_threads  = device_param->kernel_wgs_aux2;  break;
+    case KERN_RUN_AUX3:   kernel_threads  = device_param->kernel_wgs_aux3;  break;
+    case KERN_RUN_AUX4:   kernel_threads  = device_param->kernel_wgs_aux4;  break;
   }
 
   kernel_threads = MIN (kernel_threads, device_param->kernel_threads);
 
   // kernel_threads = power_of_two_floor_32 (kernel_threads);
 
-  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+  if (device_param->is_cuda == true)
+  {
+    num_elements = CEILDIV (num_elements, kernel_threads);
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+  }
 
   int CL_rc;
+  int CU_rc;
 
-  for (u32 i = 0; i <= 23; i++)
+  if (device_param->is_cuda == true)
   {
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, i, sizeof (cl_mem), device_param->kernel_params[i]);
+    CUfunction cuda_function = NULL;
 
-    if (CL_rc == -1) return -1;
-  }
-
-  for (u32 i = 24; i <= 33; i++)
-  {
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, i, sizeof (cl_uint), device_param->kernel_params[i]);
-
-    if (CL_rc == -1) return -1;
-  }
-
-  for (u32 i = 34; i <= 34; i++)
-  {
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, i, sizeof (cl_ulong), device_param->kernel_params[i]);
-
-    if (CL_rc == -1) return -1;
-  }
-
-  cl_event event;
-
-  if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
-  {
-    const size_t global_work_size[3] = { num_elements,  32, 1 };
-    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
-
-    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event);
-
-    if (CL_rc == -1) return -1;
-  }
-  else
-  {
-    if (kern_run == KERN_RUN_1)
+    if (device_param->is_cuda == true)
     {
-      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
+      switch (kern_run)
       {
-        num_elements = CEILDIV (num_elements, device_param->vector_width);
-      }
-    }
-    else if (kern_run == KERN_RUN_2)
-    {
-      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
-      {
-        num_elements = CEILDIV (num_elements, device_param->vector_width);
-      }
-    }
-    else if (kern_run == KERN_RUN_3)
-    {
-      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
-      {
-        num_elements = CEILDIV (num_elements, device_param->vector_width);
+        case KERN_RUN_1:      cuda_function = device_param->cuda_function1;      break;
+        case KERN_RUN_12:     cuda_function = device_param->cuda_function12;     break;
+        case KERN_RUN_2:      cuda_function = device_param->cuda_function2;      break;
+        case KERN_RUN_23:     cuda_function = device_param->cuda_function23;     break;
+        case KERN_RUN_3:      cuda_function = device_param->cuda_function3;      break;
+        case KERN_RUN_4:      cuda_function = device_param->cuda_function4;      break;
+        case KERN_RUN_INIT2:  cuda_function = device_param->cuda_function_init2; break;
+        case KERN_RUN_LOOP2:  cuda_function = device_param->cuda_function_loop2; break;
+        case KERN_RUN_AUX1:   cuda_function = device_param->cuda_function_aux1;  break;
+        case KERN_RUN_AUX2:   cuda_function = device_param->cuda_function_aux2;  break;
+        case KERN_RUN_AUX3:   cuda_function = device_param->cuda_function_aux3;  break;
+        case KERN_RUN_AUX4:   cuda_function = device_param->cuda_function_aux4;  break;
       }
     }
 
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    CUevent cuda_event;
 
-    const size_t global_work_size[3] = { num_elements,   1, 1 };
-    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+/*
+    if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
+    {
+      const size_t global_work_size[3] = { num_elements,  32, 1 };
+      const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
-    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event);
+      CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
+
+      if (CL_rc == -1) return -1;
+    }
+    else
+    {
+      if (kern_run == KERN_RUN_1)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
+        {
+          num_elements = CEILDIV (num_elements, device_param->vector_width);
+        }
+      }
+      else if (kern_run == KERN_RUN_2)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
+        {
+          num_elements = CEILDIV (num_elements, device_param->vector_width);
+        }
+      }
+      else if (kern_run == KERN_RUN_3)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
+        {
+          num_elements = CEILDIV (num_elements, device_param->vector_width);
+        }
+      }
+
+      num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+
+      const size_t global_work_size[3] = { num_elements,   1, 1 };
+      const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+      CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
+
+      if (CL_rc == -1) return -1;
+    }
+
+    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
 
     if (CL_rc == -1) return -1;
-  }
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+    // spin damper section
 
-  if (CL_rc == -1) return -1;
+    const u32 iterationm = iteration % EXPECTED_ITERATIONS;
 
-  // spin damper section
+    cl_int opencl_event_status;
 
-  const u32 iterationm = iteration % EXPECTED_ITERATIONS;
+    size_t param_value_size_ret;
 
-  cl_int event_status;
+    CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
 
-  size_t param_value_size_ret;
+    if (CL_rc == -1) return -1;
 
-  CL_rc = hc_clGetEventInfo (hashcat_ctx, event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (event_status), &event_status, &param_value_size_ret);
+    if (device_param->spin_damp > 0)
+    {
+      double spin_total = device_param->spin_damp;
 
-  if (CL_rc == -1) return -1;
+      while (opencl_event_status != CL_COMPLETE)
+      {
+        if (status_ctx->devices_status == STATUS_RUNNING)
+        {
+          switch (kern_run)
+          {
+            case KERN_RUN_1:      if (device_param->exec_us_prev1[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm]      * device_param->spin_damp)); break;
+            case KERN_RUN_2:      if (device_param->exec_us_prev2[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm]      * device_param->spin_damp)); break;
+            case KERN_RUN_3:      if (device_param->exec_us_prev3[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm]      * device_param->spin_damp)); break;
+            case KERN_RUN_4:      if (device_param->exec_us_prev4[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm]      * device_param->spin_damp)); break;
+            case KERN_RUN_INIT2:  if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
+            case KERN_RUN_LOOP2:  if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
+            case KERN_RUN_AUX1:   if (device_param->exec_us_prev_aux1[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm]  * device_param->spin_damp)); break;
+            case KERN_RUN_AUX2:   if (device_param->exec_us_prev_aux2[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm]  * device_param->spin_damp)); break;
+            case KERN_RUN_AUX3:   if (device_param->exec_us_prev_aux3[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm]  * device_param->spin_damp)); break;
+            case KERN_RUN_AUX4:   if (device_param->exec_us_prev_aux4[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm]  * device_param->spin_damp)); break;
+          }
+        }
+        else
+        {
+          // we were told to be nice
 
-  if (device_param->spin_damp > 0)
-  {
-    double spin_total = device_param->spin_damp;
+          sleep (0);
+        }
 
-    while (event_status != CL_COMPLETE)
+        CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
+
+        if (CL_rc == -1) return -1;
+
+        spin_total += device_param->spin_damp;
+
+        if (spin_total > 1) break;
+      }
+    }
+
+    CL_rc = hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event);
+
+    if (CL_rc == -1) return -1;
+
+    cl_ulong time_start;
+    cl_ulong time_end;
+
+    CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_END,   sizeof (time_end),   &time_end,   NULL); if (CL_rc == -1) return -1;
+
+    const double exec_us = (double) (time_end - time_start) / 1000;
+
+    if (device_param->spin_damp > 0)
     {
       if (status_ctx->devices_status == STATUS_RUNNING)
       {
         switch (kern_run)
         {
-          case KERN_RUN_1:      if (device_param->exec_us_prev1[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm]      * device_param->spin_damp)); break;
-          case KERN_RUN_2:      if (device_param->exec_us_prev2[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm]      * device_param->spin_damp)); break;
-          case KERN_RUN_3:      if (device_param->exec_us_prev3[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm]      * device_param->spin_damp)); break;
-          case KERN_RUN_4:      if (device_param->exec_us_prev4[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm]      * device_param->spin_damp)); break;
-          case KERN_RUN_INIT2:  if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
-          case KERN_RUN_LOOP2:  if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
-          case KERN_RUN_AUX1:   if (device_param->exec_us_prev_aux1[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm]  * device_param->spin_damp)); break;
-          case KERN_RUN_AUX2:   if (device_param->exec_us_prev_aux2[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm]  * device_param->spin_damp)); break;
-          case KERN_RUN_AUX3:   if (device_param->exec_us_prev_aux3[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm]  * device_param->spin_damp)); break;
-          case KERN_RUN_AUX4:   if (device_param->exec_us_prev_aux4[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm]  * device_param->spin_damp)); break;
+          case KERN_RUN_1:      device_param->exec_us_prev1[iterationm]      = exec_us; break;
+          case KERN_RUN_2:      device_param->exec_us_prev2[iterationm]      = exec_us; break;
+          case KERN_RUN_3:      device_param->exec_us_prev3[iterationm]      = exec_us; break;
+          case KERN_RUN_4:      device_param->exec_us_prev4[iterationm]      = exec_us; break;
+          case KERN_RUN_INIT2:  device_param->exec_us_prev_init2[iterationm] = exec_us; break;
+          case KERN_RUN_LOOP2:  device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
+          case KERN_RUN_AUX1:   device_param->exec_us_prev_aux1[iterationm]  = exec_us; break;
+          case KERN_RUN_AUX2:   device_param->exec_us_prev_aux2[iterationm]  = exec_us; break;
+          case KERN_RUN_AUX3:   device_param->exec_us_prev_aux3[iterationm]  = exec_us; break;
+          case KERN_RUN_AUX4:   device_param->exec_us_prev_aux4[iterationm]  = exec_us; break;
         }
       }
-      else
-      {
-        // we were told to be nice
+    }
 
-        sleep (0);
+    if (event_update)
+    {
+      u32 exec_pos = device_param->exec_pos;
+
+      device_param->exec_msec[exec_pos] = exec_us / 1000;
+
+      exec_pos++;
+
+      if (exec_pos == EXEC_CACHE)
+      {
+        exec_pos = 0;
       }
 
-      CL_rc = hc_clGetEventInfo (hashcat_ctx, event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (event_status), &event_status, &param_value_size_ret);
-
-      if (CL_rc == -1) return -1;
-
-      spin_total += device_param->spin_damp;
-
-      if (spin_total > 1) break;
+      device_param->exec_pos = exec_pos;
     }
+
+    CL_rc = hc_clReleaseEvent (hashcat_ctx, opencl_event);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+*/
   }
 
-  CL_rc = hc_clWaitForEvents (hashcat_ctx, 1, &event);
-
-  if (CL_rc == -1) return -1;
-
-  cl_ulong time_start;
-  cl_ulong time_end;
-
-  CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); if (CL_rc == -1) return -1;
-  CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, event, CL_PROFILING_COMMAND_END,   sizeof (time_end),   &time_end,   NULL); if (CL_rc == -1) return -1;
-
-  const double exec_us = (double) (time_end - time_start) / 1000;
-
-  if (device_param->spin_damp > 0)
+  if (device_param->is_opencl == true)
   {
-    if (status_ctx->devices_status == STATUS_RUNNING)
+    cl_kernel opencl_kernel = NULL;
+
+    if (device_param->is_opencl == true)
     {
       switch (kern_run)
       {
-        case KERN_RUN_1:      device_param->exec_us_prev1[iterationm]      = exec_us; break;
-        case KERN_RUN_2:      device_param->exec_us_prev2[iterationm]      = exec_us; break;
-        case KERN_RUN_3:      device_param->exec_us_prev3[iterationm]      = exec_us; break;
-        case KERN_RUN_4:      device_param->exec_us_prev4[iterationm]      = exec_us; break;
-        case KERN_RUN_INIT2:  device_param->exec_us_prev_init2[iterationm] = exec_us; break;
-        case KERN_RUN_LOOP2:  device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
-        case KERN_RUN_AUX1:   device_param->exec_us_prev_aux1[iterationm]  = exec_us; break;
-        case KERN_RUN_AUX2:   device_param->exec_us_prev_aux2[iterationm]  = exec_us; break;
-        case KERN_RUN_AUX3:   device_param->exec_us_prev_aux3[iterationm]  = exec_us; break;
-        case KERN_RUN_AUX4:   device_param->exec_us_prev_aux4[iterationm]  = exec_us; break;
+        case KERN_RUN_1:      opencl_kernel = device_param->opencl_kernel1;      break;
+        case KERN_RUN_12:     opencl_kernel = device_param->opencl_kernel12;     break;
+        case KERN_RUN_2:      opencl_kernel = device_param->opencl_kernel2;      break;
+        case KERN_RUN_23:     opencl_kernel = device_param->opencl_kernel23;     break;
+        case KERN_RUN_3:      opencl_kernel = device_param->opencl_kernel3;      break;
+        case KERN_RUN_4:      opencl_kernel = device_param->opencl_kernel4;      break;
+        case KERN_RUN_INIT2:  opencl_kernel = device_param->opencl_kernel_init2; break;
+        case KERN_RUN_LOOP2:  opencl_kernel = device_param->opencl_kernel_loop2; break;
+        case KERN_RUN_AUX1:   opencl_kernel = device_param->opencl_kernel_aux1;  break;
+        case KERN_RUN_AUX2:   opencl_kernel = device_param->opencl_kernel_aux2;  break;
+        case KERN_RUN_AUX3:   opencl_kernel = device_param->opencl_kernel_aux3;  break;
+        case KERN_RUN_AUX4:   opencl_kernel = device_param->opencl_kernel_aux4;  break;
       }
     }
-  }
 
-  if (event_update)
-  {
-    u32 exec_pos = device_param->exec_pos;
-
-    device_param->exec_msec[exec_pos] = exec_us / 1000;
-
-    exec_pos++;
-
-    if (exec_pos == EXEC_CACHE)
+    for (u32 i = 0; i <= 23; i++)
     {
-      exec_pos = 0;
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_mem), device_param->kernel_params[i]);
+
+      if (CL_rc == -1) return -1;
     }
 
-    device_param->exec_pos = exec_pos;
+    for (u32 i = 24; i <= 33; i++)
+    {
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_uint), device_param->kernel_params[i]);
+
+      if (CL_rc == -1) return -1;
+    }
+
+    for (u32 i = 34; i <= 34; i++)
+    {
+      CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_ulong), device_param->kernel_params[i]);
+
+      if (CL_rc == -1) return -1;
+    }
+
+    cl_event opencl_event;
+
+    if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
+    {
+      const size_t global_work_size[3] = { num_elements,  32, 1 };
+      const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+      CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
+
+      if (CL_rc == -1) return -1;
+    }
+    else
+    {
+      if (kern_run == KERN_RUN_1)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
+        {
+          num_elements = CEILDIV (num_elements, device_param->vector_width);
+        }
+      }
+      else if (kern_run == KERN_RUN_2)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
+        {
+          num_elements = CEILDIV (num_elements, device_param->vector_width);
+        }
+      }
+      else if (kern_run == KERN_RUN_3)
+      {
+        if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
+        {
+          num_elements = CEILDIV (num_elements, device_param->vector_width);
+        }
+      }
+
+      num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+
+      const size_t global_work_size[3] = { num_elements,   1, 1 };
+      const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+      CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
+
+      if (CL_rc == -1) return -1;
+    }
+
+    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+
+    // spin damper section
+
+    const u32 iterationm = iteration % EXPECTED_ITERATIONS;
+
+    cl_int opencl_event_status;
+
+    size_t param_value_size_ret;
+
+    CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
+
+    if (CL_rc == -1) return -1;
+
+    if (device_param->spin_damp > 0)
+    {
+      double spin_total = device_param->spin_damp;
+
+      while (opencl_event_status != CL_COMPLETE)
+      {
+        if (status_ctx->devices_status == STATUS_RUNNING)
+        {
+          switch (kern_run)
+          {
+            case KERN_RUN_1:      if (device_param->exec_us_prev1[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm]      * device_param->spin_damp)); break;
+            case KERN_RUN_2:      if (device_param->exec_us_prev2[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm]      * device_param->spin_damp)); break;
+            case KERN_RUN_3:      if (device_param->exec_us_prev3[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm]      * device_param->spin_damp)); break;
+            case KERN_RUN_4:      if (device_param->exec_us_prev4[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm]      * device_param->spin_damp)); break;
+            case KERN_RUN_INIT2:  if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
+            case KERN_RUN_LOOP2:  if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
+            case KERN_RUN_AUX1:   if (device_param->exec_us_prev_aux1[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm]  * device_param->spin_damp)); break;
+            case KERN_RUN_AUX2:   if (device_param->exec_us_prev_aux2[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm]  * device_param->spin_damp)); break;
+            case KERN_RUN_AUX3:   if (device_param->exec_us_prev_aux3[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm]  * device_param->spin_damp)); break;
+            case KERN_RUN_AUX4:   if (device_param->exec_us_prev_aux4[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm]  * device_param->spin_damp)); break;
+          }
+        }
+        else
+        {
+          // we were told to be nice
+
+          sleep (0);
+        }
+
+        CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
+
+        if (CL_rc == -1) return -1;
+
+        spin_total += device_param->spin_damp;
+
+        if (spin_total > 1) break;
+      }
+    }
+
+    CL_rc = hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event);
+
+    if (CL_rc == -1) return -1;
+
+    cl_ulong time_start;
+    cl_ulong time_end;
+
+    CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); if (CL_rc == -1) return -1;
+    CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_END,   sizeof (time_end),   &time_end,   NULL); if (CL_rc == -1) return -1;
+
+    const double exec_us = (double) (time_end - time_start) / 1000;
+
+    if (device_param->spin_damp > 0)
+    {
+      if (status_ctx->devices_status == STATUS_RUNNING)
+      {
+        switch (kern_run)
+        {
+          case KERN_RUN_1:      device_param->exec_us_prev1[iterationm]      = exec_us; break;
+          case KERN_RUN_2:      device_param->exec_us_prev2[iterationm]      = exec_us; break;
+          case KERN_RUN_3:      device_param->exec_us_prev3[iterationm]      = exec_us; break;
+          case KERN_RUN_4:      device_param->exec_us_prev4[iterationm]      = exec_us; break;
+          case KERN_RUN_INIT2:  device_param->exec_us_prev_init2[iterationm] = exec_us; break;
+          case KERN_RUN_LOOP2:  device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
+          case KERN_RUN_AUX1:   device_param->exec_us_prev_aux1[iterationm]  = exec_us; break;
+          case KERN_RUN_AUX2:   device_param->exec_us_prev_aux2[iterationm]  = exec_us; break;
+          case KERN_RUN_AUX3:   device_param->exec_us_prev_aux3[iterationm]  = exec_us; break;
+          case KERN_RUN_AUX4:   device_param->exec_us_prev_aux4[iterationm]  = exec_us; break;
+        }
+      }
+    }
+
+    if (event_update)
+    {
+      u32 exec_pos = device_param->exec_pos;
+
+      device_param->exec_msec[exec_pos] = exec_us / 1000;
+
+      exec_pos++;
+
+      if (exec_pos == EXEC_CACHE)
+      {
+        exec_pos = 0;
+      }
+
+      device_param->exec_pos = exec_pos;
+    }
+
+    CL_rc = hc_clReleaseEvent (hashcat_ctx, opencl_event);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
   }
 
-  CL_rc = hc_clReleaseEvent (hashcat_ctx, event);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
-
-  if (CL_rc == -1) return -1;
-
   return 0;
 }
 
@@ -9705,6 +10042,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_cuda == true)
     {
+      device_param->kernel_params[ 0] = &device_param->cuda_d_pws_buf;
+      device_param->kernel_params[ 4] = &device_param->cuda_d_tmps;
+      device_param->kernel_params[ 5] = &device_param->cuda_d_hooks;
     }
 
     if (device_param->is_opencl == true)
@@ -9729,6 +10069,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           if (device_param->is_cuda == true)
           {
+            device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                              ? &device_param->cuda_d_pws_buf
+                                              : &device_param->cuda_d_pws_amp_buf;
+
+            //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
           }
 
           if (device_param->is_opencl == true)
@@ -9746,6 +10091,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (device_param->is_cuda == true)
         {
+          device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                              ? &device_param->cuda_d_pws_buf
+                                              : &device_param->cuda_d_pws_amp_buf;
+
+          //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
         }
 
         if (device_param->is_opencl == true)
@@ -9766,6 +10116,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (device_param->is_cuda == true)
         {
+          device_param->kernel_params_amp[0] = &device_param->cuda_d_pws_buf;
+          device_param->kernel_params_amp[1] = &device_param->cuda_d_pws_amp_buf;
+
+          //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1;
+          //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
         }
 
         if (device_param->is_opencl == true)
@@ -9781,6 +10136,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_cuda == true)
     {
+      device_param->kernel_params_decompress[0] = &device_param->cuda_d_pws_idx;
+      device_param->kernel_params_decompress[1] = &device_param->cuda_d_pws_comp_buf;
+      device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+                                                ? &device_param->cuda_d_pws_buf
+                                                : &device_param->cuda_d_pws_amp_buf;
+
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
+      //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
     }
 
     if (device_param->is_opencl == true)

From ec9925f3b1b1cf5f6b225bebff9c1716ce47e6b9 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 4 May 2019 21:52:00 +0200
Subject: [PATCH 27/73] Warnings self-check and autotune with CUDA

---
 OpenCL/inc_platform.cl |   2 +-
 include/ext_cuda.h     |  18 +
 include/types.h        |   4 +
 src/autotune.c         |  86 ++++-
 src/backend.c          | 773 +++++++++++++++++++++++------------------
 src/selftest.c         | 297 +++++++++++++---
 6 files changed, 771 insertions(+), 409 deletions(-)

diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index ceb12a4f1..1dc643173 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -30,7 +30,7 @@ DECLSPEC u32 atomic_or (u32 *p, u32 val)
 
 DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
 {
-  return blockDim.x * blockIdx.x + threadIdx.x;
+  return (blockIdx.x * blockDim.x) + threadIdx.x;
 }
 
 DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
diff --git a/include/ext_cuda.h b/include/ext_cuda.h
index f48cca490..eb8967f09 100644
--- a/include/ext_cuda.h
+++ b/include/ext_cuda.h
@@ -933,6 +933,24 @@ typedef enum CUctx_flags_enum {
     CU_CTX_FLAGS_MASK          = 0x1f
 } CUctx_flags;
 
+/**
+ * Stream creation flags
+ */
+typedef enum CUstream_flags_enum {
+    CU_STREAM_DEFAULT      = 0x0, /**< Default stream flag */
+    CU_STREAM_NON_BLOCKING = 0x1  /**< Stream does not synchronize with stream 0 (the NULL stream) */
+} CUstream_flags;
+
+/**
+ * Event creation flags
+ */
+typedef enum CUevent_flags_enum {
+    CU_EVENT_DEFAULT        = 0x0, /**< Default event flag */
+    CU_EVENT_BLOCKING_SYNC  = 0x1, /**< Event uses blocking synchronization */
+    CU_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */
+    CU_EVENT_INTERPROCESS   = 0x4  /**< Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set */
+} CUevent_flags;
+
 #ifdef _WIN32
 #define CUDAAPI __stdcall
 #else
diff --git a/include/types.h b/include/types.h
index 5ff50d5d4..76f8910a7 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1248,6 +1248,10 @@ typedef struct hc_device_param
 
   CUdevice          cuda_device;
   CUcontext         cuda_context;
+  CUstream          cuda_stream;
+
+  CUevent           cuda_event1;
+  CUevent           cuda_event2;
 
   CUmodule          cuda_module;
   CUmodule          cuda_module_mp;
diff --git a/src/autotune.c b/src/autotune.c
index 80d438bd7..d0b99f59a 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -104,10 +104,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   const u32 kernel_power_max = device_param->hardware_power * kernel_accel_max;
 
   int CL_rc;
+  int CU_rc;
 
-  CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max);
+  if (device_param->is_cuda == true)
+  {
+    CU_rc = run_cuda_kernel_atinit (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, kernel_power_max);
 
-  if (CL_rc == -1) return -1;
+    if (CU_rc == -1) return -1;
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max);
+
+    if (CL_rc == -1) return -1;
+  }
 
   if (user_options->slow_candidates == true)
   {
@@ -118,9 +129,19 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     {
       if (straight_ctx->kernel_rules_cnt > 1)
       {
-        CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
+        if (device_param->is_cuda == true)
+        {
+          CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t));
 
-        if (CL_rc == -1) return -1;
+          if (CU_rc == -1) return -1;
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
       }
     }
   }
@@ -135,7 +156,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     if (exec_msec > 2000)
     {
-      event_log_error (hashcat_ctx, "OpenCL kernel minimum runtime larger than default TDR");
+      event_log_error (hashcat_ctx, "Kernel minimum runtime larger than default TDR");
 
       return -1;
     }
@@ -215,6 +236,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   double exec_msec_pre_final = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
 
+printf ("%f\n", exec_msec_pre_final);
+
   const u32 exec_left = (const u32) (target_msec / exec_msec_pre_final);
 
   const u32 accel_left = kernel_accel_max / kernel_accel;
@@ -228,25 +251,51 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     kernel_accel *= exec_accel_min;
   }
 
-  // reset them fake words
+  if (device_param->is_cuda == true)
+  {
+    // reset them fake words
 
-  CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 0, device_param->size_pws);
+    CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, 0, device_param->size_pws);
 
-  if (CL_rc == -1) return -1;
+    if (CL_rc == -1) return -1;
 
-  // reset other buffers in case autotune cracked something
+    // reset other buffers in case autotune cracked something
 
-  CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, 0, device_param->size_plains);
+    CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, 0, device_param->size_plains);
 
-  if (CL_rc == -1) return -1;
+    if (CL_rc == -1) return -1;
 
-  CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, 0, device_param->size_shown);
+    CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, 0, device_param->size_shown);
 
-  if (CL_rc == -1) return -1;
+    if (CL_rc == -1) return -1;
 
-  CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_result, 0, device_param->size_results);
+    CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_result, 0, device_param->size_results);
 
-  if (CL_rc == -1) return -1;
+    if (CL_rc == -1) return -1;
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    // reset them fake words
+
+    CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 0, device_param->size_pws);
+
+    if (CL_rc == -1) return -1;
+
+    // reset other buffers in case autotune cracked something
+
+    CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, 0, device_param->size_plains);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, 0, device_param->size_shown);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_result, 0, device_param->size_results);
+
+    if (CL_rc == -1) return -1;
+  }
 
   // reset timer
 
@@ -293,6 +342,13 @@ HC_API_CALL void *thread_autotune (void *p)
 
   if (device_param->skipped_warning == true) return NULL;
 
+  if (device_param->is_cuda == true)
+  {
+    const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context);
+
+    if (rc_cuCtxSetCurrent == -1) return NULL;
+  }
+
   const int rc_autotune = autotune (hashcat_ctx, device_param);
 
   if (rc_autotune == -1)
diff --git a/src/backend.c b/src/backend.c
index 434abb364..7759c41fa 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2775,16 +2775,199 @@ void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 p
   hcfree (tmp_pws_idx);
 }
 
+int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num)
+{
+  u64 num_elements = num;
+
+  device_param->kernel_params_atinit[0]       = (void *) &buf;
+  device_param->kernel_params_atinit_buf64[1] = num_elements;
+
+  const u64 kernel_threads = device_param->kernel_wgs_atinit;
+
+  num_elements = CEILDIV (num_elements, kernel_threads);
+
+  CUfunction function = device_param->cuda_function_atinit;
+
+  const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_atinit, NULL);
+
+  if (rc_cuLaunchKernel == -1) return -1;
+
+  const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+
+  if (rc_cuCtxSynchronize == -1) return -1;
+
+  return 0;
+}
+
+int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size)
+{
+  const u64 num16d = size / 16;
+  const u64 num16m = size % 16;
+
+  if (num16d)
+  {
+    device_param->kernel_params_memset[0]       = (void *) &buf;
+    device_param->kernel_params_memset_buf32[1] = value;
+    device_param->kernel_params_memset_buf64[2] = num16d;
+
+    const u64 kernel_threads = device_param->kernel_wgs_memset;
+
+    u64 num_elements = num16d;
+
+    num_elements = CEILDIV (num_elements, kernel_threads);
+
+    CUfunction function = device_param->cuda_function_memset;
+
+    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem),   (void *) &buf);                         if (CU_rc == -1) return -1;
+    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CU_rc == -1) return -1;
+    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CU_rc == -1) return -1;
+
+    //const size_t global_work_size[3] = { num_elements,   1, 1 };
+    //const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_memset, NULL);
+
+    if (rc_cuLaunchKernel == -1) return -1;
+
+    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+
+    if (rc_cuCtxSynchronize == -1) return -1;
+  }
+
+  if (num16m)
+  {
+    u32 tmp[4];
+
+    tmp[0] = value;
+    tmp[1] = value;
+    tmp[2] = value;
+    tmp[3] = value;
+
+    // Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/
+
+    const int rc_cuMemcpyHtoD = hc_cuMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m);
+
+    if (rc_cuMemcpyHtoD == -1) return -1;
+  }
+
+  return 0;
+}
+
+int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size)
+{
+  return run_cuda_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
+}
+
+int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num)
+{
+  u64 num_elements = num;
+
+  device_param->kernel_params_atinit_buf64[1] = num_elements;
+
+  const u64 kernel_threads = device_param->kernel_wgs_atinit;
+
+  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+
+  cl_kernel kernel = device_param->opencl_kernel_atinit;
+
+  const size_t global_work_size[3] = { num_elements,    1, 1 };
+  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+
+  int CL_rc;
+
+  CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf);
+
+  if (CL_rc == -1) return -1;
+
+  CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]);
+
+  if (CL_rc == -1) return -1;
+
+  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+  if (CL_rc == -1) return -1;
+
+  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+
+  if (CL_rc == -1) return -1;
+
+  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+  if (CL_rc == -1) return -1;
+
+  return 0;
+}
+
+int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size)
+{
+  const u64 num16d = size / 16;
+  const u64 num16m = size % 16;
+
+  if (num16d)
+  {
+    device_param->kernel_params_memset_buf32[1] = value;
+    device_param->kernel_params_memset_buf64[2] = num16d;
+
+    const u64 kernel_threads = device_param->kernel_wgs_memset;
+
+    u64 num_elements = num16d;
+
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+
+    cl_kernel kernel = device_param->opencl_kernel_memset;
+
+    int CL_rc;
+
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem),   (void *) &buf);                         if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
+
+    const size_t global_work_size[3] = { num_elements,   1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+  }
+
+  if (num16m)
+  {
+    u32 tmp[4];
+
+    tmp[0] = value;
+    tmp[1] = value;
+    tmp[2] = value;
+    tmp[3] = value;
+
+    int CL_rc;
+
+    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
+  }
+
+  return 0;
+}
+
+int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size)
+{
+  return run_opencl_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
+}
+
 int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num, const u32 event_update, const u32 iteration)
 {
   const hashconfig_t   *hashconfig   = hashcat_ctx->hashconfig;
   const status_ctx_t   *status_ctx   = hashcat_ctx->status_ctx;
   const user_options_t *user_options = hashcat_ctx->user_options;
 
-  u64 num_elements = num;
-
-  device_param->kernel_params_buf64[34] = num;
-
   u64 kernel_threads = 0;
 
   switch (kern_run)
@@ -2805,20 +2988,9 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
   kernel_threads = MIN (kernel_threads, device_param->kernel_threads);
 
-  // kernel_threads = power_of_two_floor_32 (kernel_threads);
+  device_param->kernel_params_buf64[34] = num;
 
-  if (device_param->is_cuda == true)
-  {
-    num_elements = CEILDIV (num_elements, kernel_threads);
-  }
-
-  if (device_param->is_opencl == true)
-  {
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
-  }
-
-  int CL_rc;
-  int CU_rc;
+  u64 num_elements = num;
 
   if (device_param->is_cuda == true)
   {
@@ -2843,17 +3015,21 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
-    CUevent cuda_event;
+    num_elements = CEILDIV (num_elements, kernel_threads);
 
-/*
     if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
     {
-      const size_t global_work_size[3] = { num_elements,  32, 1 };
-      const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+      const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream);
 
-      CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
+      if (rc_cuEventRecord1 == -1) return -1;
 
-      if (CL_rc == -1) return -1;
+      const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
+
+      if (rc_cuLaunchKernel == -1) return -1;
+
+      const int rc_cuEventRecord2 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream);
+
+      if (rc_cuEventRecord2 == -1) return -1;
     }
     else
     {
@@ -2879,108 +3055,40 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
         }
       }
 
-      num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+      num_elements = CEILDIV (num_elements, kernel_threads);
 
-      const size_t global_work_size[3] = { num_elements,   1, 1 };
-      const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+      const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream);
 
-      CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
+      if (rc_cuEventRecord1 == -1) return -1;
 
-      if (CL_rc == -1) return -1;
+      const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
+
+      if (rc_cuLaunchKernel == -1) return -1;
+
+      const int rc_cuEventRecord2 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream);
+
+      if (rc_cuEventRecord2 == -1) return -1;
     }
 
-    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+    const int rc_cuEventSynchronize = hc_cuEventSynchronize (hashcat_ctx, device_param->cuda_event2);
 
-    if (CL_rc == -1) return -1;
+    if (rc_cuEventSynchronize == -1) return -1;
 
-    // spin damper section
+    const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
 
-    const u32 iterationm = iteration % EXPECTED_ITERATIONS;
+    if (rc_cuStreamSynchronize == -1) return -1;
 
-    cl_int opencl_event_status;
+    float exec_ms;
 
-    size_t param_value_size_ret;
+    const int rc_cuEventElapsedTime = hc_cuEventElapsedTime (hashcat_ctx, &exec_ms, device_param->cuda_event1, device_param->cuda_event2);
 
-    CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
-
-    if (CL_rc == -1) return -1;
-
-    if (device_param->spin_damp > 0)
-    {
-      double spin_total = device_param->spin_damp;
-
-      while (opencl_event_status != CL_COMPLETE)
-      {
-        if (status_ctx->devices_status == STATUS_RUNNING)
-        {
-          switch (kern_run)
-          {
-            case KERN_RUN_1:      if (device_param->exec_us_prev1[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm]      * device_param->spin_damp)); break;
-            case KERN_RUN_2:      if (device_param->exec_us_prev2[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm]      * device_param->spin_damp)); break;
-            case KERN_RUN_3:      if (device_param->exec_us_prev3[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm]      * device_param->spin_damp)); break;
-            case KERN_RUN_4:      if (device_param->exec_us_prev4[iterationm]      > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm]      * device_param->spin_damp)); break;
-            case KERN_RUN_INIT2:  if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
-            case KERN_RUN_LOOP2:  if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
-            case KERN_RUN_AUX1:   if (device_param->exec_us_prev_aux1[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm]  * device_param->spin_damp)); break;
-            case KERN_RUN_AUX2:   if (device_param->exec_us_prev_aux2[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm]  * device_param->spin_damp)); break;
-            case KERN_RUN_AUX3:   if (device_param->exec_us_prev_aux3[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm]  * device_param->spin_damp)); break;
-            case KERN_RUN_AUX4:   if (device_param->exec_us_prev_aux4[iterationm]  > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm]  * device_param->spin_damp)); break;
-          }
-        }
-        else
-        {
-          // we were told to be nice
-
-          sleep (0);
-        }
-
-        CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
-
-        if (CL_rc == -1) return -1;
-
-        spin_total += device_param->spin_damp;
-
-        if (spin_total > 1) break;
-      }
-    }
-
-    CL_rc = hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event);
-
-    if (CL_rc == -1) return -1;
-
-    cl_ulong time_start;
-    cl_ulong time_end;
-
-    CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); if (CL_rc == -1) return -1;
-    CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_END,   sizeof (time_end),   &time_end,   NULL); if (CL_rc == -1) return -1;
-
-    const double exec_us = (double) (time_end - time_start) / 1000;
-
-    if (device_param->spin_damp > 0)
-    {
-      if (status_ctx->devices_status == STATUS_RUNNING)
-      {
-        switch (kern_run)
-        {
-          case KERN_RUN_1:      device_param->exec_us_prev1[iterationm]      = exec_us; break;
-          case KERN_RUN_2:      device_param->exec_us_prev2[iterationm]      = exec_us; break;
-          case KERN_RUN_3:      device_param->exec_us_prev3[iterationm]      = exec_us; break;
-          case KERN_RUN_4:      device_param->exec_us_prev4[iterationm]      = exec_us; break;
-          case KERN_RUN_INIT2:  device_param->exec_us_prev_init2[iterationm] = exec_us; break;
-          case KERN_RUN_LOOP2:  device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
-          case KERN_RUN_AUX1:   device_param->exec_us_prev_aux1[iterationm]  = exec_us; break;
-          case KERN_RUN_AUX2:   device_param->exec_us_prev_aux2[iterationm]  = exec_us; break;
-          case KERN_RUN_AUX3:   device_param->exec_us_prev_aux3[iterationm]  = exec_us; break;
-          case KERN_RUN_AUX4:   device_param->exec_us_prev_aux4[iterationm]  = exec_us; break;
-        }
-      }
-    }
+    if (rc_cuEventElapsedTime == -1) return -1;
 
     if (event_update)
     {
       u32 exec_pos = device_param->exec_pos;
 
-      device_param->exec_msec[exec_pos] = exec_us / 1000;
+      device_param->exec_msec[exec_pos] = exec_ms;
 
       exec_pos++;
 
@@ -2991,19 +3099,12 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
       device_param->exec_pos = exec_pos;
     }
-
-    CL_rc = hc_clReleaseEvent (hashcat_ctx, opencl_event);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
-
-    if (CL_rc == -1) return -1;
-*/
   }
 
   if (device_param->is_opencl == true)
   {
+    int CL_rc;
+
     cl_kernel opencl_kernel = NULL;
 
     if (device_param->is_opencl == true)
@@ -3046,6 +3147,8 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       if (CL_rc == -1) return -1;
     }
 
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+
     cl_event opencl_event;
 
     if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
@@ -3208,6 +3311,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
 int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num)
 {
+puts ("run_kernel_mp");
   u64 num_elements = num;
 
   switch (kern_run)
@@ -3289,6 +3393,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
 int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
+puts ("run_kernel_tm");
   const u64 num_elements = 1024; // fixed
 
   const u64 kernel_threads = MIN (num_elements, device_param->kernel_wgs_tm);
@@ -3317,6 +3422,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
 int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num)
 {
+puts ("run_kernel_amp");
   u64 num_elements = num;
 
   device_param->kernel_params_amp_buf64[6] = num_elements;
@@ -3353,6 +3459,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
 int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num)
 {
+puts ("run_kernel_decompress");
   u64 num_elements = num;
 
   device_param->kernel_params_decompress_buf64[3] = num_elements;
@@ -3387,171 +3494,9 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
   return 0;
 }
 
-int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num)
-{
-  u64 num_elements = num;
-
-  device_param->kernel_params_atinit_buf64[1] = num_elements;
-
-  const u64 kernel_threads = device_param->kernel_wgs_atinit;
-
-  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
-
-  cl_kernel kernel = device_param->opencl_kernel_atinit;
-
-  const size_t global_work_size[3] = { num_elements,    1, 1 };
-  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
-
-  int CL_rc;
-
-  CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
-
-  if (CL_rc == -1) return -1;
-
-  return 0;
-}
-
-int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size)
-{
-  const u64 num16d = size / 16;
-  const u64 num16m = size % 16;
-
-  if (num16d)
-  {
-    device_param->kernel_params_memset[0]       = (void *) &buf;
-    device_param->kernel_params_memset_buf32[1] = value;
-    device_param->kernel_params_memset_buf64[2] = num16d;
-
-    const u64 kernel_threads = device_param->kernel_wgs_memset;
-
-    u64 num_elements = num16d;
-
-    num_elements = CEILDIV (num_elements, kernel_threads);
-
-    CUfunction function = device_param->cuda_function_memset;
-
-    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem),   (void *) &buf);                         if (CU_rc == -1) return -1;
-    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CU_rc == -1) return -1;
-    //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CU_rc == -1) return -1;
-
-    //const size_t global_work_size[3] = { num_elements,   1, 1 };
-    //const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
-
-    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_memset, NULL);
-
-    if (rc_cuLaunchKernel == -1) return -1;
-
-    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
-
-    if (rc_cuCtxSynchronize == -1) return -1;
-  }
-
-  if (num16m)
-  {
-    u32 tmp[4];
-
-    tmp[0] = value;
-    tmp[1] = value;
-    tmp[2] = value;
-    tmp[3] = value;
-
-    // Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/
-
-    const int rc_cuMemcpyHtoD = hc_cuMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m);
-
-    if (rc_cuMemcpyHtoD == -1) return -1;
-  }
-
-  return 0;
-}
-
-int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size)
-{
-  return run_cuda_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
-}
-
-int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size)
-{
-  const u64 num16d = size / 16;
-  const u64 num16m = size % 16;
-
-  if (num16d)
-  {
-    device_param->kernel_params_memset_buf32[1] = value;
-    device_param->kernel_params_memset_buf64[2] = num16d;
-
-    const u64 kernel_threads = device_param->kernel_wgs_memset;
-
-    u64 num_elements = num16d;
-
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
-
-    cl_kernel kernel = device_param->opencl_kernel_memset;
-
-    int CL_rc;
-
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem),   (void *) &buf);                         if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint),  device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
-    CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
-
-    const size_t global_work_size[3] = { num_elements,   1, 1 };
-    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
-
-    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
-
-    if (CL_rc == -1) return -1;
-
-    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
-
-    if (CL_rc == -1) return -1;
-  }
-
-  if (num16m)
-  {
-    u32 tmp[4];
-
-    tmp[0] = value;
-    tmp[1] = value;
-    tmp[2] = value;
-    tmp[3] = value;
-
-    int CL_rc;
-
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL);
-
-    if (CL_rc == -1) return -1;
-  }
-
-  return 0;
-}
-
-int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size)
-{
-  return run_opencl_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
-}
-
 int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt)
 {
+puts ("run_copy");
   combinator_ctx_t     *combinator_ctx      = hashcat_ctx->combinator_ctx;
   hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
   user_options_t       *user_options        = hashcat_ctx->user_options;
@@ -3755,6 +3700,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
 int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt)
 {
+puts ("run_cracker");
   combinator_ctx_t      *combinator_ctx     = hashcat_ctx->combinator_ctx;
   hashconfig_t          *hashconfig         = hashcat_ctx->hashconfig;
   hashes_t              *hashes             = hashcat_ctx->hashes;
@@ -6799,6 +6745,32 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (CL_rc == -1) return -1;
     }
 
+    /**
+     * create stream for CUDA devices
+     */
+
+    if (device_param->is_cuda == true)
+    {
+      const int rc_cuStreamCreate = hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT);
+
+      if (rc_cuStreamCreate == -1) return -1;
+    }
+
+    /**
+     * create events for CUDA devices
+     */
+
+    if (device_param->is_cuda == true)
+    {
+      const int rc_cuEventCreate1 = hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event1, CU_EVENT_DEFAULT);
+
+      if (rc_cuEventCreate1 == -1) return -1;
+
+      const int rc_cuEventCreate2 = hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event2, CU_EVENT_DEFAULT);
+
+      if (rc_cuEventCreate2 == -1) return -1;
+    }
+
     /**
      * create input buffers on device : calculate size of fixed memory buffers
      */
@@ -10199,6 +10171,121 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
     hcfree (device_param->brain_link_out_buf);
     #endif
 
+    if (device_param->is_cuda == true)
+    {
+      if (device_param->cuda_d_pws_buf)        hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_buf);
+      if (device_param->cuda_d_pws_amp_buf)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_amp_buf);
+      if (device_param->cuda_d_pws_comp_buf)   hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_comp_buf);
+      if (device_param->cuda_d_pws_idx)        hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_idx);
+      if (device_param->cuda_d_rules)          hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules);
+      if (device_param->cuda_d_rules_c)        hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules_c);
+      if (device_param->cuda_d_combs)          hc_cuMemFree (hashcat_ctx, device_param->cuda_d_combs);
+      if (device_param->cuda_d_combs_c)        hc_cuMemFree (hashcat_ctx, device_param->cuda_d_combs_c);
+      if (device_param->cuda_d_bfs)            hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs);
+      if (device_param->cuda_d_bfs_c)          hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs_c);
+      if (device_param->cuda_d_bitmap_s1_a)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_a);
+      if (device_param->cuda_d_bitmap_s1_b)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_b);
+      if (device_param->cuda_d_bitmap_s1_c)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_c);
+      if (device_param->cuda_d_bitmap_s1_d)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_d);
+      if (device_param->cuda_d_bitmap_s2_a)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s2_a);
+      if (device_param->cuda_d_bitmap_s2_b)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s2_b);
+      if (device_param->cuda_d_bitmap_s2_c)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s2_c);
+      if (device_param->cuda_d_bitmap_s2_d)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s2_d);
+      if (device_param->cuda_d_plain_bufs)     hc_cuMemFree (hashcat_ctx, device_param->cuda_d_plain_bufs);
+      if (device_param->cuda_d_digests_buf)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_digests_buf);
+      if (device_param->cuda_d_digests_shown)  hc_cuMemFree (hashcat_ctx, device_param->cuda_d_digests_shown);
+      if (device_param->cuda_d_salt_bufs)      hc_cuMemFree (hashcat_ctx, device_param->cuda_d_salt_bufs);
+      if (device_param->cuda_d_esalt_bufs)     hc_cuMemFree (hashcat_ctx, device_param->cuda_d_esalt_bufs);
+      if (device_param->cuda_d_tmps)           hc_cuMemFree (hashcat_ctx, device_param->cuda_d_tmps);
+      if (device_param->cuda_d_hooks)          hc_cuMemFree (hashcat_ctx, device_param->cuda_d_hooks);
+      if (device_param->cuda_d_result)         hc_cuMemFree (hashcat_ctx, device_param->cuda_d_result);
+      if (device_param->cuda_d_extra0_buf)     hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra0_buf);
+      if (device_param->cuda_d_extra1_buf)     hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra1_buf);
+      if (device_param->cuda_d_extra2_buf)     hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra2_buf);
+      if (device_param->cuda_d_extra3_buf)     hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra3_buf);
+      if (device_param->cuda_d_root_css_buf)   hc_cuMemFree (hashcat_ctx, device_param->cuda_d_root_css_buf);
+      if (device_param->cuda_d_markov_css_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_markov_css_buf);
+      if (device_param->cuda_d_tm_c)           hc_cuMemFree (hashcat_ctx, device_param->cuda_d_tm_c);
+      if (device_param->cuda_d_st_digests_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_digests_buf);
+      if (device_param->cuda_d_st_salts_buf)   hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_salts_buf);
+      if (device_param->cuda_d_st_esalts_buf)  hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_esalts_buf);
+
+      if (device_param->cuda_event1)           hc_cuEventDestroy (hashcat_ctx, device_param->cuda_event1);
+      if (device_param->cuda_event2)           hc_cuEventDestroy (hashcat_ctx, device_param->cuda_event2);
+
+      if (device_param->cuda_stream)           hc_cuStreamDestroy (hashcat_ctx, device_param->cuda_stream);
+
+      if (device_param->cuda_module)           hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module);
+      if (device_param->cuda_module_mp)        hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_mp);
+      if (device_param->cuda_module_amp)       hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_amp);
+
+      if (device_param->cuda_context)          hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
+
+      device_param->cuda_d_pws_buf            = 0;
+      device_param->cuda_d_pws_amp_buf        = 0;
+      device_param->cuda_d_pws_comp_buf       = 0;
+      device_param->cuda_d_pws_idx            = 0;
+      device_param->cuda_d_rules              = 0;
+      device_param->cuda_d_rules_c            = 0;
+      device_param->cuda_d_combs              = 0;
+      device_param->cuda_d_combs_c            = 0;
+      device_param->cuda_d_bfs                = 0;
+      device_param->cuda_d_bfs_c              = 0;
+      device_param->cuda_d_bitmap_s1_a        = 0;
+      device_param->cuda_d_bitmap_s1_b        = 0;
+      device_param->cuda_d_bitmap_s1_c        = 0;
+      device_param->cuda_d_bitmap_s1_d        = 0;
+      device_param->cuda_d_bitmap_s2_a        = 0;
+      device_param->cuda_d_bitmap_s2_b        = 0;
+      device_param->cuda_d_bitmap_s2_c        = 0;
+      device_param->cuda_d_bitmap_s2_d        = 0;
+      device_param->cuda_d_plain_bufs         = 0;
+      device_param->cuda_d_digests_buf        = 0;
+      device_param->cuda_d_digests_shown      = 0;
+      device_param->cuda_d_salt_bufs          = 0;
+      device_param->cuda_d_esalt_bufs         = 0;
+      device_param->cuda_d_tmps               = 0;
+      device_param->cuda_d_hooks              = 0;
+      device_param->cuda_d_result             = 0;
+      device_param->cuda_d_extra0_buf         = 0;
+      device_param->cuda_d_extra1_buf         = 0;
+      device_param->cuda_d_extra2_buf         = 0;
+      device_param->cuda_d_extra3_buf         = 0;
+      device_param->cuda_d_root_css_buf       = 0;
+      device_param->cuda_d_markov_css_buf     = 0;
+      device_param->cuda_d_tm_c               = 0;
+      device_param->cuda_d_st_digests_buf     = 0;
+      device_param->cuda_d_st_salts_buf       = 0;
+      device_param->cuda_d_st_esalts_buf      = 0;
+
+      device_param->cuda_function1            = NULL;
+      device_param->cuda_function12           = NULL;
+      device_param->cuda_function2            = NULL;
+      device_param->cuda_function23           = NULL;
+      device_param->cuda_function3            = NULL;
+      device_param->cuda_function4            = NULL;
+      device_param->cuda_function_init2       = NULL;
+      device_param->cuda_function_loop2       = NULL;
+      device_param->cuda_function_mp          = NULL;
+      device_param->cuda_function_mp_l        = NULL;
+      device_param->cuda_function_mp_r        = NULL;
+      device_param->cuda_function_tm          = NULL;
+      device_param->cuda_function_amp         = NULL;
+      device_param->cuda_function_memset      = NULL;
+      device_param->cuda_function_atinit      = NULL;
+      device_param->cuda_function_decompress  = NULL;
+      device_param->cuda_function_aux1        = NULL;
+      device_param->cuda_function_aux2        = NULL;
+      device_param->cuda_function_aux3        = NULL;
+      device_param->cuda_function_aux4        = NULL;
+
+      device_param->cuda_module               = NULL;
+      device_param->cuda_module_mp            = NULL;
+      device_param->cuda_module_amp           = NULL;
+
+      device_param->cuda_context              = NULL;
+    }
+
     if (device_param->is_opencl == true)
     {
       if (device_param->opencl_d_pws_buf)        hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_buf);
@@ -10266,6 +10353,68 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       if (device_param->opencl_command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
 
       if (device_param->opencl_context)          hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
+
+      device_param->opencl_d_pws_buf           = NULL;
+      device_param->opencl_d_pws_amp_buf       = NULL;
+      device_param->opencl_d_pws_comp_buf      = NULL;
+      device_param->opencl_d_pws_idx           = NULL;
+      device_param->opencl_d_rules             = NULL;
+      device_param->opencl_d_rules_c           = NULL;
+      device_param->opencl_d_combs             = NULL;
+      device_param->opencl_d_combs_c           = NULL;
+      device_param->opencl_d_bfs               = NULL;
+      device_param->opencl_d_bfs_c             = NULL;
+      device_param->opencl_d_bitmap_s1_a       = NULL;
+      device_param->opencl_d_bitmap_s1_b       = NULL;
+      device_param->opencl_d_bitmap_s1_c       = NULL;
+      device_param->opencl_d_bitmap_s1_d       = NULL;
+      device_param->opencl_d_bitmap_s2_a       = NULL;
+      device_param->opencl_d_bitmap_s2_b       = NULL;
+      device_param->opencl_d_bitmap_s2_c       = NULL;
+      device_param->opencl_d_bitmap_s2_d       = NULL;
+      device_param->opencl_d_plain_bufs        = NULL;
+      device_param->opencl_d_digests_buf       = NULL;
+      device_param->opencl_d_digests_shown     = NULL;
+      device_param->opencl_d_salt_bufs         = NULL;
+      device_param->opencl_d_esalt_bufs        = NULL;
+      device_param->opencl_d_tmps              = NULL;
+      device_param->opencl_d_hooks             = NULL;
+      device_param->opencl_d_result            = NULL;
+      device_param->opencl_d_extra0_buf        = NULL;
+      device_param->opencl_d_extra1_buf        = NULL;
+      device_param->opencl_d_extra2_buf        = NULL;
+      device_param->opencl_d_extra3_buf        = NULL;
+      device_param->opencl_d_root_css_buf      = NULL;
+      device_param->opencl_d_markov_css_buf    = NULL;
+      device_param->opencl_d_tm_c              = NULL;
+      device_param->opencl_d_st_digests_buf    = NULL;
+      device_param->opencl_d_st_salts_buf      = NULL;
+      device_param->opencl_d_st_esalts_buf     = NULL;
+      device_param->opencl_kernel1             = NULL;
+      device_param->opencl_kernel12            = NULL;
+      device_param->opencl_kernel2             = NULL;
+      device_param->opencl_kernel23            = NULL;
+      device_param->opencl_kernel3             = NULL;
+      device_param->opencl_kernel4             = NULL;
+      device_param->opencl_kernel_init2        = NULL;
+      device_param->opencl_kernel_loop2        = NULL;
+      device_param->opencl_kernel_mp           = NULL;
+      device_param->opencl_kernel_mp_l         = NULL;
+      device_param->opencl_kernel_mp_r         = NULL;
+      device_param->opencl_kernel_tm           = NULL;
+      device_param->opencl_kernel_amp          = NULL;
+      device_param->opencl_kernel_memset       = NULL;
+      device_param->opencl_kernel_atinit       = NULL;
+      device_param->opencl_kernel_decompress   = NULL;
+      device_param->opencl_kernel_aux1         = NULL;
+      device_param->opencl_kernel_aux2         = NULL;
+      device_param->opencl_kernel_aux3         = NULL;
+      device_param->opencl_kernel_aux4         = NULL;
+      device_param->opencl_program             = NULL;
+      device_param->opencl_program_mp          = NULL;
+      device_param->opencl_program_amp         = NULL;
+      device_param->opencl_command_queue       = NULL;
+      device_param->opencl_context             = NULL;
     }
 
     device_param->pws_comp            = NULL;
@@ -10279,68 +10428,6 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
     device_param->brain_link_in_buf   = NULL;
     device_param->brain_link_out_buf  = NULL;
     #endif
-
-    device_param->opencl_d_pws_buf           = NULL;
-    device_param->opencl_d_pws_amp_buf       = NULL;
-    device_param->opencl_d_pws_comp_buf      = NULL;
-    device_param->opencl_d_pws_idx           = NULL;
-    device_param->opencl_d_rules             = NULL;
-    device_param->opencl_d_rules_c           = NULL;
-    device_param->opencl_d_combs             = NULL;
-    device_param->opencl_d_combs_c           = NULL;
-    device_param->opencl_d_bfs               = NULL;
-    device_param->opencl_d_bfs_c             = NULL;
-    device_param->opencl_d_bitmap_s1_a       = NULL;
-    device_param->opencl_d_bitmap_s1_b       = NULL;
-    device_param->opencl_d_bitmap_s1_c       = NULL;
-    device_param->opencl_d_bitmap_s1_d       = NULL;
-    device_param->opencl_d_bitmap_s2_a       = NULL;
-    device_param->opencl_d_bitmap_s2_b       = NULL;
-    device_param->opencl_d_bitmap_s2_c       = NULL;
-    device_param->opencl_d_bitmap_s2_d       = NULL;
-    device_param->opencl_d_plain_bufs        = NULL;
-    device_param->opencl_d_digests_buf       = NULL;
-    device_param->opencl_d_digests_shown     = NULL;
-    device_param->opencl_d_salt_bufs         = NULL;
-    device_param->opencl_d_esalt_bufs        = NULL;
-    device_param->opencl_d_tmps              = NULL;
-    device_param->opencl_d_hooks             = NULL;
-    device_param->opencl_d_result            = NULL;
-    device_param->opencl_d_extra0_buf        = NULL;
-    device_param->opencl_d_extra1_buf        = NULL;
-    device_param->opencl_d_extra2_buf        = NULL;
-    device_param->opencl_d_extra3_buf        = NULL;
-    device_param->opencl_d_root_css_buf      = NULL;
-    device_param->opencl_d_markov_css_buf    = NULL;
-    device_param->opencl_d_tm_c              = NULL;
-    device_param->opencl_d_st_digests_buf    = NULL;
-    device_param->opencl_d_st_salts_buf      = NULL;
-    device_param->opencl_d_st_esalts_buf     = NULL;
-    device_param->opencl_kernel1             = NULL;
-    device_param->opencl_kernel12            = NULL;
-    device_param->opencl_kernel2             = NULL;
-    device_param->opencl_kernel23            = NULL;
-    device_param->opencl_kernel3             = NULL;
-    device_param->opencl_kernel4             = NULL;
-    device_param->opencl_kernel_init2        = NULL;
-    device_param->opencl_kernel_loop2        = NULL;
-    device_param->opencl_kernel_mp           = NULL;
-    device_param->opencl_kernel_mp_l         = NULL;
-    device_param->opencl_kernel_mp_r         = NULL;
-    device_param->opencl_kernel_tm           = NULL;
-    device_param->opencl_kernel_amp          = NULL;
-    device_param->opencl_kernel_memset       = NULL;
-    device_param->opencl_kernel_atinit       = NULL;
-    device_param->opencl_kernel_decompress   = NULL;
-    device_param->opencl_kernel_aux1         = NULL;
-    device_param->opencl_kernel_aux2         = NULL;
-    device_param->opencl_kernel_aux3         = NULL;
-    device_param->opencl_kernel_aux4         = NULL;
-    device_param->opencl_program             = NULL;
-    device_param->opencl_program_mp          = NULL;
-    device_param->opencl_program_amp         = NULL;
-    device_param->opencl_command_queue       = NULL;
-    device_param->opencl_context             = NULL;
   }
 }
 
diff --git a/src/selftest.c b/src/selftest.c
index 81d9b415e..144b71611 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -21,17 +21,26 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   user_options_t       *user_options       = hashcat_ctx->user_options;
   user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
 
-  cl_int CL_err;
-
+  int CU_rc;
   int CL_rc;
 
   if (hashconfig->st_hash == NULL) return 0;
 
   // init : replace hashes with selftest hash
 
-  device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf;
-  device_param->kernel_params[17] = &device_param->opencl_d_st_salts_buf;
-  device_param->kernel_params[18] = &device_param->opencl_d_st_esalts_buf;
+  if (device_param->is_cuda == true)
+  {
+    device_param->kernel_params[15] = &device_param->cuda_d_st_digests_buf;
+    device_param->kernel_params[17] = &device_param->cuda_d_st_salts_buf;
+    device_param->kernel_params[18] = &device_param->cuda_d_st_esalts_buf;
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf;
+    device_param->kernel_params[17] = &device_param->opencl_d_st_salts_buf;
+    device_param->kernel_params[18] = &device_param->opencl_d_st_esalts_buf;
+  }
 
   device_param->kernel_params_buf32[31] = 1;
   device_param->kernel_params_buf32[32] = 0;
@@ -57,9 +66,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     pw.pw_len = (u32) pw_len;
 
-    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+    if (device_param->is_cuda == true)
+    {
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t));
 
-    if (CL_err != CL_SUCCESS) return -1;
+      if (CU_rc == -1) return -1;
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+
+      if (CL_rc == -1) return -1;
+    }
   }
   else
   {
@@ -84,9 +103,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           uppercase ((u8 *) pw_ptr, pw.pw_len);
         }
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+        if (device_param->is_cuda == true)
+        {
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t));
 
-        if (CL_err != CL_SUCCESS) return -1;
+          if (CU_rc == -1) return -1;
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
       {
@@ -136,13 +165,27 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
           comb_ptr[comb.pw_len] = 0x80;
         }
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL);
+        if (device_param->is_cuda == true)
+        {
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, &comb, 1 * sizeof (pw_t));
 
-        if (CL_err != CL_SUCCESS) return -1;
+          if (CU_rc == -1) return -1;
 
-        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t));
 
-        if (CL_err != CL_SUCCESS) return -1;
+          if (CU_rc == -1) return -1;
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
       {
@@ -165,9 +208,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
           pw.pw_len = (u32) pw_len;
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+          if (device_param->is_cuda == true)
+          {
+            CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t));
 
-          if (CL_err != CL_SUCCESS) return -1;
+            if (CU_rc == -1) return -1;
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
         }
         else
         {
@@ -208,9 +261,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             bf.i = byte_swap_32 (bf.i);
           }
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL);
+          if (device_param->is_cuda == true)
+          {
+            CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bfs_c, &bf, 1 * sizeof (bf_t));
 
-          if (CL_err != CL_SUCCESS) return -1;
+            if (CU_rc == -1) return -1;
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
 
           pw_t pw; memset (&pw, 0, sizeof (pw));
 
@@ -296,9 +359,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             for (int i = 0; i < 14; i++) pw.i[i] = byte_swap_32 (pw.i[i]);
           }
 
-          CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+          if (device_param->is_cuda == true)
+          {
+            CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t));
 
-          if (CL_err != CL_SUCCESS) return -1;
+            if (CU_rc == -1) return -1;
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
 
           highest_pw_len = pw.pw_len;
         }
@@ -316,9 +389,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       pw.pw_len = (u32) pw_len;
 
-      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t));
 
-      if (CL_err != CL_SUCCESS) return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
+
+        if (CL_rc == -1) return -1;
+      }
     }
   }
 
@@ -372,15 +455,35 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks);
 
-      if (CL_rc == -1) return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+        if (CL_rc == -1) return -1;
+      }
 
       module_ctx->module_hook12 (device_param, hashes->st_hook_salts_buf, 0, 1);
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks);
 
-      if (CL_rc == -1) return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+        if (CL_rc == -1) return -1;
+      }
     }
 
     const u32 salt_pos = 0;
@@ -411,15 +514,35 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
       if (CL_rc == -1) return -1;
 
-      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks);
 
-      if (CL_rc == -1) return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+        if (CL_rc == -1) return -1;
+      }
 
       module_ctx->module_hook23 (device_param, hashes->st_hook_salts_buf, 0, 1);
 
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks);
 
-      if (CL_rc == -1) return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+        if (CL_rc == -1) return -1;
+      }
     }
 
     if (hashconfig->opts_type & OPTS_TYPE_INIT2)
@@ -492,9 +615,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   u32 num_cracked;
 
-  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+  if (device_param->is_cuda == true)
+  {
+    CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32));
 
-  if (CL_err != CL_SUCCESS) return -1;
+    if (CU_rc == -1) return -1;
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
+  }
 
   // finish : cleanup and restore
 
@@ -507,42 +640,99 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   device_param->kernel_params_buf32[33] = 0;
   device_param->kernel_params_buf64[34] = 0;
 
-  device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
-  device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
-  device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
+  if (device_param->is_cuda == true)
+  {
+    device_param->kernel_params[15] = &device_param->cuda_d_digests_buf;
+    device_param->kernel_params[17] = &device_param->cuda_d_salt_bufs;
+    device_param->kernel_params[18] = &device_param->cuda_d_esalt_bufs;
 
-  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
-  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
-  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
-  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
-  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
-  CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
+    CU_rc = run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_pws_buf,         device_param->size_pws);      if (CU_rc == -1) return -1;
+    CU_rc = run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_tmps,            device_param->size_tmps);     if (CU_rc == -1) return -1;
+    CU_rc = run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_hooks,           device_param->size_hooks);    if (CU_rc == -1) return -1;
+    CU_rc = run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs,      device_param->size_plains);   if (CU_rc == -1) return -1;
+    CU_rc = run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_digests_shown,   device_param->size_shown);    if (CU_rc == -1) return -1;
+    CU_rc = run_cuda_kernel_bzero   (hashcat_ctx, device_param, device_param->cuda_d_result,          device_param->size_results);  if (CU_rc == -1) return -1;
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    device_param->kernel_params[15] = &device_param->opencl_d_digests_buf;
+    device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs;
+    device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs;
+
+    CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
+    CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
+    CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
+    CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
+    CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
+    CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result,        device_param->size_results);  if (CL_rc == -1) return -1;
+  }
 
   if (user_options->slow_candidates == true)
   {
-    CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
+    if (device_param->is_cuda == true)
+    {
+      CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, device_param->size_rules_c);
 
-    if (CL_rc == -1) return -1;
-  }
-  else
-  {
-    if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+      if (CU_rc == -1) return -1;
+    }
+
+    if (device_param->is_opencl == true)
     {
       CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
 
       if (CL_rc == -1) return -1;
     }
+  }
+  else
+  {
+    if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+    {
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, device_param->size_rules_c);
+
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c);
+
+        if (CL_rc == -1) return -1;
+      }
+    }
     else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
     {
-      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs);
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c, device_param->size_combs);
 
-      if (CL_rc == -1) return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs);
+
+        if (CL_rc == -1) return -1;
+      }
     }
     else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
     {
-      CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs);
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c, device_param->size_bfs);
 
-      if (CL_rc == -1) return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs);
+
+        if (CL_rc == -1) return -1;
+      }
     }
   }
 
@@ -586,6 +776,13 @@ HC_API_CALL void *thread_selftest (void *p)
 
   if (device_param->skipped_warning == true) return NULL;
 
+  if (device_param->is_cuda == true)
+  {
+    const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context);
+
+    if (rc_cuCtxSetCurrent == -1) return NULL;
+  }
+
   const int rc_selftest = selftest (hashcat_ctx, device_param);
 
   if (user_options->benchmark == true)

From 08dc1acc02ff32df0645686bdec47d6a33f8251f Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 5 May 2019 11:57:54 +0200
Subject: [PATCH 28/73] More CUDA rewrites

---
 src/autotune.c |  18 +-
 src/backend.c  | 668 +++++++++++++++++++++++++++++++++++--------------
 src/dispatch.c |  14 ++
 src/hashes.c   |  73 ++++--
 4 files changed, 558 insertions(+), 215 deletions(-)

diff --git a/src/autotune.c b/src/autotune.c
index d0b99f59a..90f067d8b 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -236,8 +236,6 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   double exec_msec_pre_final = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
 
-printf ("%f\n", exec_msec_pre_final);
-
   const u32 exec_left = (const u32) (target_msec / exec_msec_pre_final);
 
   const u32 accel_left = kernel_accel_max / kernel_accel;
@@ -255,23 +253,23 @@ printf ("%f\n", exec_msec_pre_final);
   {
     // reset them fake words
 
-    CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, 0, device_param->size_pws);
+    CU_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, 0, device_param->size_pws);
 
-    if (CL_rc == -1) return -1;
+    if (CU_rc == -1) return -1;
 
     // reset other buffers in case autotune cracked something
 
-    CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, 0, device_param->size_plains);
+    CU_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, 0, device_param->size_plains);
 
-    if (CL_rc == -1) return -1;
+    if (CU_rc == -1) return -1;
 
-    CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, 0, device_param->size_shown);
+    CU_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, 0, device_param->size_shown);
 
-    if (CL_rc == -1) return -1;
+    if (CU_rc == -1) return -1;
 
-    CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_result, 0, device_param->size_results);
+    CU_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_result, 0, device_param->size_results);
 
-    if (CL_rc == -1) return -1;
+    if (CU_rc == -1) return -1;
   }
 
   if (device_param->is_opencl == true)
diff --git a/src/backend.c b/src/backend.c
index 7759c41fa..151ef37d9 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2438,21 +2438,46 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
 {
   pw_idx_t pw_idx;
 
-  int CL_rc;
+  pw_idx.off = 0;
+  pw_idx.cnt = 0;
+  pw_idx.len = 0;
 
-  CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL);
+  if (device_param->is_cuda == true)
+  {
+    const int CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->cuda_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t));
 
-  if (CL_rc == -1) return -1;
+    if (CU_rc == -1) return -1;
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    const int CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
+  }
 
   const u32 off = pw_idx.off;
   const u32 cnt = pw_idx.cnt;
   const u32 len = pw_idx.len;
 
-  if (cnt > 0)
+  if (device_param->is_cuda == true)
   {
-    CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), pw->i, 0, NULL, NULL);
+    if (cnt > 0)
+    {
+      const int CU_rc = hc_cuMemcpyDtoH (hashcat_ctx,pw->i, device_param->cuda_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32));
 
-    if (CL_rc == -1) return -1;
+      if (CU_rc == -1) return -1;
+    }
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    if (cnt > 0)
+    {
+      const int CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), pw->i, 0, NULL, NULL);
+
+      if (CL_rc == -1) return -1;
+    }
   }
 
   for (u32 i = cnt; i < 64; i++)
@@ -2478,6 +2503,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
     return process_stdout (hashcat_ctx, device_param, pws_cnt);
   }
 
+  int CU_rc;
   int CL_rc;
 
   if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -2493,17 +2519,37 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         {
           const u32 size_tm = 32 * sizeof (bs_word_t);
 
-          CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm);
+          if (device_param->is_cuda == true)
+          {
+            CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c, size_tm);
 
-          if (CL_rc == -1) return -1;
+            if (CU_rc == -1) return -1;
+          }
 
-          CL_rc = run_kernel_tm (hashcat_ctx, device_param);
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm);
 
-          if (CL_rc == -1) return -1;
+            if (CL_rc == -1) return -1;
+          }
 
-          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tm_c, device_param->opencl_d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
+          const int rc_kernel_tm = run_kernel_tm (hashcat_ctx, device_param);
 
-          if (CL_rc == -1) return -1;
+          if (rc_kernel_tm == -1) return -1;
+
+          if (device_param->is_cuda == true)
+          {
+            const int CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_tm_c, size_tm);
+
+            if (CU_rc == -1) return -1;
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tm_c, device_param->opencl_d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
         }
       }
     }
@@ -2544,6 +2590,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     if (run_init == true)
     {
+//tbd
       CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
 
       if (CL_rc == -1) return -1;
@@ -2568,12 +2615,14 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
         if (CL_rc == -1) return -1;
 
+//tbd
         CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
 
         module_ctx->module_hook12 (device_param, hashes->hook_salts_buf, salt_pos, pws_cnt);
 
+//tbd
         CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
@@ -2640,12 +2689,14 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
         if (CL_rc == -1) return -1;
 
+//tbd
         CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
 
         module_ctx->module_hook23 (device_param, hashes->hook_salts_buf, salt_pos, pws_cnt);
 
+//tbd
         CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
 
         if (CL_rc == -1) return -1;
@@ -3311,7 +3362,15 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
 int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num)
 {
-puts ("run_kernel_mp");
+  u64 kernel_threads = 0;
+
+  switch (kern_run)
+  {
+    case KERN_RUN_MP:   kernel_threads  = device_param->kernel_wgs_mp;    break;
+    case KERN_RUN_MP_R: kernel_threads  = device_param->kernel_wgs_mp_r;  break;
+    case KERN_RUN_MP_L: kernel_threads  = device_param->kernel_wgs_mp_l;  break;
+  }
+
   u64 num_elements = num;
 
   switch (kern_run)
@@ -3321,76 +3380,97 @@ puts ("run_kernel_mp");
     case KERN_RUN_MP_L: device_param->kernel_params_mp_l_buf64[9] = num; break;
   }
 
-  u64       kernel_threads = 0;
-  cl_kernel kernel = NULL;
-
-  switch (kern_run)
+  if (device_param->is_cuda == true)
   {
-    case KERN_RUN_MP:
-      kernel          = device_param->opencl_kernel_mp;
-      kernel_threads  = device_param->kernel_wgs_mp;
-      break;
-    case KERN_RUN_MP_R:
-      kernel          = device_param->opencl_kernel_mp_r;
-      kernel_threads  = device_param->kernel_wgs_mp_r;
-      break;
-    case KERN_RUN_MP_L:
-      kernel          = device_param->opencl_kernel_mp_l;
-      kernel_threads  = device_param->kernel_wgs_mp_l;
-      break;
-    default:
-      event_log_error (hashcat_ctx, "Invalid kernel specified.");
-      return -1;
+    CUfunction cuda_function = NULL;
+
+    void **cuda_args = NULL;
+
+    switch (kern_run)
+    {
+      case KERN_RUN_MP:   cuda_function = device_param->cuda_function_mp;
+                          cuda_args     = device_param->kernel_params_mp;
+                          break;
+      case KERN_RUN_MP_R: cuda_function = device_param->cuda_function_mp_r;
+                          cuda_args     = device_param->kernel_params_mp_r;
+                          break;
+      case KERN_RUN_MP_L: cuda_function = device_param->cuda_function_mp_l;
+                          cuda_args     = device_param->kernel_params_mp_l;
+                          break;
+    }
+
+    num_elements = CEILDIV (num_elements, kernel_threads);
+
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, cuda_args, NULL);
+
+    if (rc_cuLaunchKernel == -1) return -1;
+
+    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+
+    if (rc_cuCtxSynchronize == -1) return -1;
   }
 
-  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
-
-  int CL_rc;
-
-  switch (kern_run)
+  if (device_param->is_opencl == true)
   {
-    case KERN_RUN_MP:   CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]);   if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp[4]);   if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp[5]);   if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp[6]);   if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp[7]);   if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp[8]);   if (CL_rc == -1) return -1;
-                        break;
-    case KERN_RUN_MP_R: CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp_r[4]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp_r[5]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp_r[6]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp_r[7]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp_r[8]); if (CL_rc == -1) return -1;
-                        break;
-    case KERN_RUN_MP_L: CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_l[3]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp_l[4]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp_l[5]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp_l[6]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp_l[7]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 8, sizeof (cl_uint),  device_param->kernel_params_mp_l[8]); if (CL_rc == -1) return -1;
-                        CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 9, sizeof (cl_ulong), device_param->kernel_params_mp_l[9]); if (CL_rc == -1) return -1;
-                        break;
+    int CL_rc;
+
+    cl_kernel opencl_kernel = NULL;
+
+    switch (kern_run)
+    {
+      case KERN_RUN_MP:   opencl_kernel = device_param->opencl_kernel_mp;   break;
+      case KERN_RUN_MP_R: opencl_kernel = device_param->opencl_kernel_mp_r; break;
+      case KERN_RUN_MP_L: opencl_kernel = device_param->opencl_kernel_mp_l; break;
+    }
+
+    switch (kern_run)
+    {
+      case KERN_RUN_MP:   CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]);   if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp[4]);   if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp[5]);   if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp[6]);   if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp[7]);   if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp[8]);   if (CL_rc == -1) return -1;
+                          break;
+      case KERN_RUN_MP_R: CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp_r[4]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp_r[5]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp_r[6]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp_r[7]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_ulong), device_param->kernel_params_mp_r[8]); if (CL_rc == -1) return -1;
+                          break;
+      case KERN_RUN_MP_L: CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_l[3]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 4, sizeof (cl_uint),  device_param->kernel_params_mp_l[4]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 5, sizeof (cl_uint),  device_param->kernel_params_mp_l[5]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_uint),  device_param->kernel_params_mp_l[6]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 7, sizeof (cl_uint),  device_param->kernel_params_mp_l[7]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 8, sizeof (cl_uint),  device_param->kernel_params_mp_l[8]); if (CL_rc == -1) return -1;
+                          CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 9, sizeof (cl_ulong), device_param->kernel_params_mp_l[9]); if (CL_rc == -1) return -1;
+                          break;
+    }
+
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+
+    const size_t global_work_size[3] = { num_elements,   1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
   }
 
-  const size_t global_work_size[3] = { num_elements,   1, 1 };
-  const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
-
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
-
-  if (CL_rc == -1) return -1;
-
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
-
-  if (CL_rc == -1) return -1;
-
   return 0;
 }
 
+//tbd
 int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
 puts ("run_kernel_tm");
@@ -3420,6 +3500,7 @@ puts ("run_kernel_tm");
   return 0;
 }
 
+//tbd
 int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num)
 {
 puts ("run_kernel_amp");
@@ -3459,44 +3540,60 @@ puts ("run_kernel_amp");
 
 int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num)
 {
-puts ("run_kernel_decompress");
-  u64 num_elements = num;
+  device_param->kernel_params_decompress_buf64[3] = num;
 
-  device_param->kernel_params_decompress_buf64[3] = num_elements;
+  u64 num_elements = num;
 
   const u64 kernel_threads = device_param->kernel_wgs_decompress;
 
-  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+  if (device_param->is_cuda == true)
+  {
+    num_elements = CEILDIV (num_elements, kernel_threads);
 
-  cl_kernel kernel = device_param->opencl_kernel_decompress;
+    CUfunction cuda_function = device_param->cuda_function_decompress;
 
-  const size_t global_work_size[3] = { num_elements,    1, 1 };
-  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_decompress, NULL);
 
-  int CL_rc;
+    if (rc_cuLaunchKernel == -1) return -1;
 
-  CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]);
+    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
 
-  if (CL_rc == -1) return -1;
+    if (rc_cuCtxSynchronize == -1) return -1;
+  }
 
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  if (device_param->is_opencl == true)
+  {
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
-  if (CL_rc == -1) return -1;
+    cl_kernel opencl_kernel = device_param->opencl_kernel_decompress;
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+    const size_t global_work_size[3] = { num_elements,    1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
 
-  if (CL_rc == -1) return -1;
+    int CL_rc;
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]);
 
-  if (CL_rc == -1) return -1;
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+  }
 
   return 0;
 }
 
 int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt)
 {
-puts ("run_copy");
   combinator_ctx_t     *combinator_ctx      = hashcat_ctx->combinator_ctx;
   hashconfig_t         *hashconfig          = hashcat_ctx->hashconfig;
   user_options_t       *user_options        = hashcat_ctx->user_options;
@@ -3518,30 +3615,27 @@ puts ("run_copy");
 
   if (user_options->slow_candidates == true)
   {
-    int CL_rc;
-
-    CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
-
-    if (CL_rc == -1) return -1;
-
-    const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
-
-    const u32 off = pw_idx->off;
-
-    if (off)
+    if (device_param->is_cuda == true)
     {
-      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+      int CU_rc;
 
-      if (CL_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t));
+
+      if (CU_rc == -1) return -1;
+
+      const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+      const u32 off = pw_idx->off;
+
+      if (off)
+      {
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32));
+
+        if (CU_rc == -1) return -1;
+      }
     }
 
-    CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
-
-    if (CL_rc == -1) return -1;
-  }
-  else
-  {
-    if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+    if (device_param->is_opencl == true)
     {
       int CL_rc;
 
@@ -3559,10 +3653,59 @@ puts ("run_copy");
 
         if (CL_rc == -1) return -1;
       }
+    }
 
-      CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
+    const int rc_kernel_decompress = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
 
-      if (CL_rc == -1) return -1;
+    if (rc_kernel_decompress == -1) return -1;
+  }
+  else
+  {
+    if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
+    {
+      if (device_param->is_cuda == true)
+      {
+        int CU_rc;
+
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t));
+
+        if (CU_rc == -1) return -1;
+
+        const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+        const u32 off = pw_idx->off;
+
+        if (off)
+        {
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32));
+
+          if (CU_rc == -1) return -1;
+        }
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        int CL_rc;
+
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+
+        if (CL_rc == -1) return -1;
+
+        const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+        const u32 off = pw_idx->off;
+
+        if (off)
+        {
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
+      }
+
+      const int rc_kernel_decompress = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
+
+      if (rc_kernel_decompress == -1) return -1;
     }
     else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
     {
@@ -3602,74 +3745,143 @@ puts ("run_copy");
           }
         }
 
-        int CL_rc;
-
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
-
-        if (CL_rc == -1) return -1;
-
-        const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
-
-        const u32 off = pw_idx->off;
-
-        if (off)
+        if (device_param->is_cuda == true)
         {
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+          int CU_rc;
 
-          if (CL_rc == -1) return -1;
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t));
+
+          if (CU_rc == -1) return -1;
+
+          const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+          const u32 off = pw_idx->off;
+
+          if (off)
+          {
+            CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32));
+
+            if (CU_rc == -1) return -1;
+          }
         }
 
-        CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
+        if (device_param->is_opencl == true)
+        {
+          int CL_rc;
 
-        if (CL_rc == -1) return -1;
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+
+          const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+          const u32 off = pw_idx->off;
+
+          if (off)
+          {
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
+        }
+
+        const int rc_kernel_decompress = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
+
+        if (rc_kernel_decompress == -1) return -1;
       }
       else
       {
         if (user_options->attack_mode == ATTACK_MODE_COMBI)
         {
-          int CL_rc;
-
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
-
-          if (CL_rc == -1) return -1;
-
-          const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
-
-          const u32 off = pw_idx->off;
-
-          if (off)
+          if (device_param->is_cuda == true)
           {
-            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+            int CU_rc;
 
-            if (CL_rc == -1) return -1;
+            CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t));
+
+            if (CU_rc == -1) return -1;
+
+            const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+            const u32 off = pw_idx->off;
+
+            if (off)
+            {
+              CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32));
+
+              if (CU_rc == -1) return -1;
+            }
           }
 
-          CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
+          if (device_param->is_opencl == true)
+          {
+            int CL_rc;
 
-          if (CL_rc == -1) return -1;
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+            const u32 off = pw_idx->off;
+
+            if (off)
+            {
+              CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+
+              if (CL_rc == -1) return -1;
+            }
+          }
+
+          const int rc_kernel_decompress = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
+
+          if (rc_kernel_decompress == -1) return -1;
         }
         else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
         {
-          int CL_rc;
-
-          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
-
-          if (CL_rc == -1) return -1;
-
-          const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
-
-          const u32 off = pw_idx->off;
-
-          if (off)
+          if (device_param->is_cuda == true)
           {
-            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+            int CU_rc;
 
-            if (CL_rc == -1) return -1;
+            CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t));
+
+            if (CU_rc == -1) return -1;
+
+            const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+            const u32 off = pw_idx->off;
+
+            if (off)
+            {
+              CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32));
+
+              if (CU_rc == -1) return -1;
+            }
           }
 
-          CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
+          if (device_param->is_opencl == true)
+          {
+            int CL_rc;
 
-          if (CL_rc == -1) return -1;
+            CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+
+            const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
+
+            const u32 off = pw_idx->off;
+
+            if (off)
+            {
+              CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
+
+              if (CL_rc == -1) return -1;
+            }
+          }
+
+          const int rc_kernel_decompress = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
+
+          if (rc_kernel_decompress == -1) return -1;
         }
         else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
         {
@@ -3700,7 +3912,6 @@ puts ("run_copy");
 
 int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt)
 {
-puts ("run_cracker");
   combinator_ctx_t      *combinator_ctx     = hashcat_ctx->combinator_ctx;
   hashconfig_t          *hashconfig         = hashcat_ctx->hashconfig;
   hashes_t              *hashes             = hashcat_ctx->hashes;
@@ -3862,9 +4073,19 @@ puts ("run_cracker");
       {
         if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
-          const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL);
+          if (device_param->is_cuda == true)
+          {
+            const int CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t));
 
-          if (CL_rc == -1) return -1;
+            if (CU_rc == -1) return -1;
+          }
+
+          if (device_param->is_opencl == true)
+          {
+            const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
@@ -3968,9 +4189,19 @@ puts ("run_cracker");
 
               innerloop_left = i;
 
-              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
+              if (device_param->is_cuda == true)
+              {
+                const int CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t));
 
-              if (CL_rc == -1) return -1;
+                if (CU_rc == -1) return -1;
+              }
+
+              if (device_param->is_opencl == true)
+              {
+                const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
+
+                if (CL_rc == -1) return -1;
+              }
             }
             else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
             {
@@ -3984,9 +4215,19 @@ puts ("run_cracker");
 
               if (CL_rc == -1) return -1;
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+              if (device_param->is_cuda == true)
+              {
+                const int CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t));
 
-              if (CL_rc == -1) return -1;
+                if (CU_rc == -1) return -1;
+              }
+
+              if (device_param->is_opencl == true)
+              {
+                CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+
+                if (CL_rc == -1) return -1;
+              }
             }
             else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
             {
@@ -3994,15 +4235,23 @@ puts ("run_cracker");
 
               device_param->kernel_params_mp_buf64[3] = off;
 
-              int CL_rc;
+              const int rc_kernel_mp = run_kernel_mp (hashcat_ctx, device_param, KERN_RUN_MP, innerloop_left);
 
-              CL_rc = run_kernel_mp (hashcat_ctx, device_param, KERN_RUN_MP, innerloop_left);
+              if (rc_kernel_mp == -1) return -1;
 
-              if (CL_rc == -1) return -1;
+              if (device_param->is_cuda == true)
+              {
+                const int CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t));
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+                if (CU_rc == -1) return -1;
+              }
 
-              if (CL_rc == -1) return -1;
+              if (device_param->is_opencl == true)
+              {
+                const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+
+                if (CL_rc == -1) return -1;
+              }
             }
           }
           else
@@ -4107,9 +4356,19 @@ puts ("run_cracker");
 
               innerloop_left = i;
 
-              const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
+              if (device_param->is_cuda == true)
+              {
+                const int CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t));
 
-              if (CL_rc == -1) return -1;
+                if (CU_rc == -1) return -1;
+              }
+
+              if (device_param->is_opencl == true)
+              {
+                const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL);
+
+                if (CL_rc == -1) return -1;
+              }
             }
             else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
             {
@@ -4117,15 +4376,23 @@ puts ("run_cracker");
 
               device_param->kernel_params_mp_buf64[3] = off;
 
-              int CL_rc;
+              const int rc_kernel_mp = run_kernel_mp (hashcat_ctx, device_param, KERN_RUN_MP, innerloop_left);
 
-              CL_rc = run_kernel_mp (hashcat_ctx, device_param, KERN_RUN_MP, innerloop_left);
+              if (rc_kernel_mp == -1) return -1;
 
-              if (CL_rc == -1) return -1;
+              if (device_param->is_cuda == true)
+              {
+                const int CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t));
 
-              CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+                if (CU_rc == -1) return -1;
+              }
 
-              if (CL_rc == -1) return -1;
+              if (device_param->is_opencl == true)
+              {
+                const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL);
+
+                if (CL_rc == -1) return -1;
+              }
             }
           }
         }
@@ -4135,15 +4402,23 @@ puts ("run_cracker");
 
           device_param->kernel_params_mp_r_buf64[3] = off;
 
-          int CL_rc;
+          const int rc_kernel_mp = run_kernel_mp (hashcat_ctx, device_param, KERN_RUN_MP_R, innerloop_left);
 
-          CL_rc = run_kernel_mp (hashcat_ctx, device_param, KERN_RUN_MP_R, innerloop_left);
+          if (rc_kernel_mp == -1) return -1;
 
-          if (CL_rc == -1) return -1;
+          if (device_param->is_cuda == true)
+          {
+            const int CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_bfs, innerloop_left * sizeof (bf_t));
 
-          CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs, device_param->opencl_d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL);
+            if (CU_rc == -1) return -1;
+          }
 
-          if (CL_rc == -1) return -1;
+          if (device_param->is_opencl == true)
+          {
+            const int CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs, device_param->opencl_d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL);
+
+            if (CL_rc == -1) return -1;
+          }
         }
       }
 
@@ -10562,9 +10837,20 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
     device_param->kernel_params_mp_buf64[3] = 0;
     device_param->kernel_params_mp_buf32[4] = mask_ctx->css_cnt;
 
+    if (device_param->is_cuda == true)
+    {
+      int CU_rc;
+
+      //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_uint),  device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
+
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css);   if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css); if (CU_rc == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
-      int CL_rc = CL_SUCCESS;
+      int CL_rc;
 
       for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
       for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_uint),  device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; }
@@ -10602,6 +10888,22 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
     device_param->kernel_params_mp_r_buf64[3] = 0;
     device_param->kernel_params_mp_r_buf32[4] = css_cnt_r;
 
+    if (device_param->is_cuda == true)
+    {
+      int CU_rc;
+
+      //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_uint),  device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 9; i < 9; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; }
+
+      //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_uint),  device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+      //for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; }
+
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css);   if (CU_rc == -1) return -1;
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css); if (CU_rc == -1) return -1;
+    }
+
     if (device_param->is_opencl == true)
     {
       int CL_rc = CL_SUCCESS;
diff --git a/src/dispatch.c b/src/dispatch.c
index 44cdb59ce..6f841124a 100644
--- a/src/dispatch.c
+++ b/src/dispatch.c
@@ -349,6 +349,13 @@ HC_API_CALL void *thread_calc_stdin (void *p)
 
   if (device_param->skipped_warning == true) return NULL;
 
+  if (device_param->is_cuda == true)
+  {
+    const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context);
+
+    if (rc_cuCtxSetCurrent == -1) return NULL;
+  }
+
   const int rc_calc = calc_stdin (hashcat_ctx, device_param);
 
   if (rc_calc == -1)
@@ -1668,6 +1675,13 @@ HC_API_CALL void *thread_calc (void *p)
 
   if (device_param->skipped_warning == true) return NULL;
 
+  if (device_param->is_cuda == true)
+  {
+    const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context);
+
+    if (rc_cuCtxSetCurrent == -1) return NULL;
+  }
+
   const int rc_calc = calc (hashcat_ctx, device_param);
 
   if (rc_calc == -1)
diff --git a/src/hashes.c b/src/hashes.c
index b43a66b26..72b9aad69 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -309,7 +309,15 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
   {
     tmps = hcmalloc (hashconfig->tmp_size);
 
-    hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
+    if (device_param->is_cuda == true)
+    {
+      hc_cuMemcpyDtoH (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
+    }
   }
 
   // hash
@@ -460,15 +468,21 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
   u32 num_cracked;
 
-  cl_int CL_err;
+  int CU_rc;
+  int CL_rc;
 
-  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
-
-  if (CL_err != CL_SUCCESS)
+  if (device_param->is_cuda == true)
   {
-    event_log_error (hashcat_ctx, "clEnqueueReadBuffer(): %s", val2cstr_cl (CL_err));
+    CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32));
 
-    return -1;
+    if (CU_rc == -1) return -1;
+  }
+
+  if (device_param->is_opencl == true)
+  {
+    CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
   }
 
   if (user_options->speed_only == true)
@@ -483,13 +497,18 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   {
     plain_t *cracked = (plain_t *) hccalloc (num_cracked, sizeof (plain_t));
 
-    CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
-
-    if (CL_err != CL_SUCCESS)
+    if (device_param->is_cuda == true)
     {
-      event_log_error (hashcat_ctx, "clEnqueueReadBuffer(): %s", val2cstr_cl (CL_err));
+      CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, cracked, device_param->cuda_d_plain_bufs, num_cracked * sizeof (plain_t));
 
-      return -1;
+      if (CU_rc == -1) return -1;
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
+
+      if (CL_rc == -1) return -1;
     }
 
     u32 cpt_cracked = 0;
@@ -553,25 +572,35 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
       memset (hashes->digests_shown_tmp, 0, salt_buf->digests_cnt * sizeof (u32));
 
-      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
-
-      if (CL_err != CL_SUCCESS)
+      if (device_param->is_cuda == true)
       {
-        event_log_error (hashcat_ctx, "clEnqueueWriteBuffer(): %s", val2cstr_cl (CL_err));
+        CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_digests_shown + (salt_buf->digests_offset * sizeof (u32)), &hashes->digests_shown_tmp[salt_buf->digests_offset], salt_buf->digests_cnt * sizeof (u32));
 
-        return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
+
+        if (CL_rc == -1) return -1;
       }
     }
 
     num_cracked = 0;
 
-    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
-
-    if (CL_err != CL_SUCCESS)
+    if (device_param->is_cuda == true)
     {
-      event_log_error (hashcat_ctx, "clEnqueueWriteBuffer(): %s", val2cstr_cl (CL_err));
+      CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_result, &num_cracked, sizeof (u32));
 
-      return -1;
+      if (CU_rc == -1) return -1;
+    }
+
+    if (device_param->is_opencl == true)
+    {
+      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
+
+      if (CL_rc == -1) return -1;
     }
   }
 

From e9c04c24463254d5d2964d360b34479ac9cfc3e2 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 5 May 2019 21:15:46 +0200
Subject: [PATCH 29/73] More CUDA implementation

---
 src/backend.c | 179 +++++++++++++++++++++++++++++++++++---------------
 src/usage.c   |   2 +-
 2 files changed, 128 insertions(+), 53 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 151ef37d9..f76344f47 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2590,10 +2590,19 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     if (run_init == true)
     {
-//tbd
-      CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
+      if (device_param->is_cuda == true)
+      {
+        CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_pws_buf, device_param->cuda_d_pws_amp_buf, pws_cnt * sizeof (pw_t));
 
-      if (CL_rc == -1) return -1;
+        if (CU_rc == -1) return -1;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
+
+        if (CL_rc == -1) return -1;
+      }
 
       if (user_options->slow_candidates == true)
       {
@@ -2605,27 +2614,45 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         if (CL_rc == -1) return -1;
       }
 
-      CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_1, pws_cnt, false, 0);
+      const int rc_kernel = run_kernel (hashcat_ctx, device_param, KERN_RUN_1, pws_cnt, false, 0);
 
-      if (CL_rc == -1) return -1;
+      if (rc_kernel == -1) return -1;
 
       if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
       {
-        CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_12, pws_cnt, false, 0);
+        const int rc_kernel = run_kernel (hashcat_ctx, device_param, KERN_RUN_12, pws_cnt, false, 0);
 
-        if (CL_rc == -1) return -1;
+        if (rc_kernel == -1) return -1;
 
-//tbd
-        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        if (device_param->is_cuda == true)
+        {
+          CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks);
 
-        if (CL_rc == -1) return -1;
+          if (CU_rc == -1) return -1;
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
 
         module_ctx->module_hook12 (device_param, hashes->hook_salts_buf, salt_pos, pws_cnt);
 
-//tbd
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        if (device_param->is_cuda == true)
+        {
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks);
 
-        if (CL_rc == -1) return -1;
+          if (CU_rc == -1) return -1;
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
       }
     }
 
@@ -2644,9 +2671,9 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
         device_param->kernel_params_buf32[28] = loop_pos;
         device_param->kernel_params_buf32[29] = loop_left;
 
-        CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_2, pws_cnt, true, slow_iteration);
+        const int rc_kernel = run_kernel (hashcat_ctx, device_param, KERN_RUN_2, pws_cnt, true, slow_iteration);
 
-        if (CL_rc == -1) return -1;
+        if (rc_kernel == -1) return -1;
 
         //bug?
         //while (status_ctx->run_thread_level2 == false) break;
@@ -2685,21 +2712,39 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
       if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
       {
-        CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_23, pws_cnt, false, 0);
+        const int rc_kernel = run_kernel (hashcat_ctx, device_param, KERN_RUN_23, pws_cnt, false, 0);
 
-        if (CL_rc == -1) return -1;
+        if (rc_kernel == -1) return -1;
 
-//tbd
-        CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        if (device_param->is_cuda == true)
+        {
+          CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks);
 
-        if (CL_rc == -1) return -1;
+          if (CU_rc == -1) return -1;
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
 
         module_ctx->module_hook23 (device_param, hashes->hook_salts_buf, salt_pos, pws_cnt);
 
-//tbd
-        CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+        if (device_param->is_cuda == true)
+        {
+          CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks);
 
-        if (CL_rc == -1) return -1;
+          if (CU_rc == -1) return -1;
+        }
+
+        if (device_param->is_opencl == true)
+        {
+          CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
+
+          if (CL_rc == -1) return -1;
+        }
       }
     }
 
@@ -3470,70 +3515,100 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   return 0;
 }
 
-//tbd
 int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
-puts ("run_kernel_tm");
   const u64 num_elements = 1024; // fixed
 
   const u64 kernel_threads = MIN (num_elements, device_param->kernel_wgs_tm);
 
-  cl_kernel kernel = device_param->opencl_kernel_tm;
+  if (device_param->is_cuda == true)
+  {
+    CUfunction cuda_function = device_param->cuda_function_tm;
 
-  const size_t global_work_size[3] = { num_elements,    1, 1 };
-  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_tm, NULL);
 
-  int CL_rc;
+    if (rc_cuLaunchKernel == -1) return -1;
 
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
 
-  if (CL_rc == -1) return -1;
+    if (rc_cuCtxSynchronize == -1) return -1;
+  }
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+  if (device_param->is_opencl == true)
+  {
+    cl_kernel cuda_kernel = device_param->opencl_kernel_tm;
 
-  if (CL_rc == -1) return -1;
+    const size_t global_work_size[3] = { num_elements,    1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+    int CL_rc;
 
-  if (CL_rc == -1) return -1;
+    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, cuda_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+  }
 
   return 0;
 }
 
-//tbd
 int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num)
 {
-puts ("run_kernel_amp");
-  u64 num_elements = num;
+  device_param->kernel_params_amp_buf64[6] = num;
 
-  device_param->kernel_params_amp_buf64[6] = num_elements;
+  u64 num_elements = num;
 
   const u64 kernel_threads = device_param->kernel_wgs_amp;
 
-  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+  if (device_param->is_cuda == true)
+  {
+    num_elements = CEILDIV (num_elements, kernel_threads);
 
-  cl_kernel kernel = device_param->opencl_kernel_amp;
+    CUfunction cuda_function = device_param->cuda_function_amp;
 
-  int CL_rc;
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_amp, NULL);
 
-  CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 6, sizeof (cl_ulong), device_param->kernel_params_amp[6]);
+    if (rc_cuLaunchKernel == -1) return -1;
 
-  if (CL_rc == -1) return -1;
+    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
 
-  const size_t global_work_size[3] = { num_elements,    1, 1 };
-  const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+    if (rc_cuCtxSynchronize == -1) return -1;
+  }
 
-  CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  if (device_param->is_opencl == true)
+  {
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
-  if (CL_rc == -1) return -1;
+    cl_kernel opencl_kernel = device_param->opencl_kernel_amp;
 
-  CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+    int CL_rc;
 
-  if (CL_rc == -1) return -1;
+    CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, 6, sizeof (cl_ulong), device_param->kernel_params_amp[6]);
 
-  CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+    if (CL_rc == -1) return -1;
 
-  if (CL_rc == -1) return -1;
+    const size_t global_work_size[3] = { num_elements,    1, 1 };
+    const size_t local_work_size[3]  = { kernel_threads,  1, 1 };
+
+    CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+
+    CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
+
+    if (CL_rc == -1) return -1;
+  }
 
   return 0;
 }
diff --git a/src/usage.c b/src/usage.c
index b6d36d379..02cf4b179 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -198,7 +198,7 @@ static const char *const USAGE_BIG_POST_HASHMODES[] =
   "  d | 0123456789",
   "  h | 0123456789abcdef",
   "  H | 0123456789ABCDEF",
-  "  s |  !\"#$%%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+  "  s |  !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
   "  a | ?l?u?d?s",
   "  b | 0x00 - 0xff",
   "",

From d94f582097086c74252f337f4d20209d6e51df68 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 6 May 2019 09:36:07 +0200
Subject: [PATCH 30/73] Replace CEILDIV() with round_up_multiple_64()

---
 src/backend.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index f76344f47..961aa432e 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2880,7 +2880,7 @@ int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
 
   const u64 kernel_threads = device_param->kernel_wgs_atinit;
 
-  num_elements = CEILDIV (num_elements, kernel_threads);
+  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
   CUfunction function = device_param->cuda_function_atinit;
 
@@ -2910,7 +2910,7 @@ int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
 
     u64 num_elements = num16d;
 
-    num_elements = CEILDIV (num_elements, kernel_threads);
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
     CUfunction function = device_param->cuda_function_memset;
 
@@ -3111,7 +3111,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
-    num_elements = CEILDIV (num_elements, kernel_threads);
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
     if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
     {
@@ -3151,7 +3151,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
         }
       }
 
-      num_elements = CEILDIV (num_elements, kernel_threads);
+      num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
       const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream);
 
@@ -3444,7 +3444,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
                           break;
     }
 
-    num_elements = CEILDIV (num_elements, kernel_threads);
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
     const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, cuda_args, NULL);
 
@@ -3569,7 +3569,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
   if (device_param->is_cuda == true)
   {
-    num_elements = CEILDIV (num_elements, kernel_threads);
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
     CUfunction cuda_function = device_param->cuda_function_amp;
 
@@ -3623,7 +3623,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
   if (device_param->is_cuda == true)
   {
-    num_elements = CEILDIV (num_elements, kernel_threads);
+    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
     CUfunction cuda_function = device_param->cuda_function_decompress;
 

From 64c495dfa5361de3c83806c7e282c79ae12fa89f Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 6 May 2019 11:23:34 +0200
Subject: [PATCH 31/73] Use CUDA stream for all cuLaunchKernel() invocations

---
 src/backend.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 961aa432e..9a162c150 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2884,13 +2884,13 @@ int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
 
   CUfunction function = device_param->cuda_function_atinit;
 
-  const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_atinit, NULL);
+  const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_atinit, NULL);
 
   if (rc_cuLaunchKernel == -1) return -1;
 
-  const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+  const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
 
-  if (rc_cuCtxSynchronize == -1) return -1;
+  if (rc_cuStreamSynchronize == -1) return -1;
 
   return 0;
 }
@@ -2921,13 +2921,13 @@ int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
     //const size_t global_work_size[3] = { num_elements,   1, 1 };
     //const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
-    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_memset, NULL);
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_memset, NULL);
 
     if (rc_cuLaunchKernel == -1) return -1;
 
-    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+    const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
 
-    if (rc_cuCtxSynchronize == -1) return -1;
+    if (rc_cuStreamSynchronize == -1) return -1;
   }
 
   if (num16m)
@@ -3446,13 +3446,13 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     num_elements = round_up_multiple_64 (num_elements, kernel_threads);
 
-    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, cuda_args, NULL);
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, cuda_args, NULL);
 
     if (rc_cuLaunchKernel == -1) return -1;
 
-    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+    const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
 
-    if (rc_cuCtxSynchronize == -1) return -1;
+    if (rc_cuStreamSynchronize == -1) return -1;
   }
 
   if (device_param->is_opencl == true)
@@ -3525,13 +3525,13 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
   {
     CUfunction cuda_function = device_param->cuda_function_tm;
 
-    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_tm, NULL);
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_tm, NULL);
 
     if (rc_cuLaunchKernel == -1) return -1;
 
-    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+    const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
 
-    if (rc_cuCtxSynchronize == -1) return -1;
+    if (rc_cuStreamSynchronize == -1) return -1;
   }
 
   if (device_param->is_opencl == true)
@@ -3573,13 +3573,13 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     CUfunction cuda_function = device_param->cuda_function_amp;
 
-    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_amp, NULL);
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_amp, NULL);
 
     if (rc_cuLaunchKernel == -1) return -1;
 
-    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+    const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
 
-    if (rc_cuCtxSynchronize == -1) return -1;
+    if (rc_cuStreamSynchronize == -1) return -1;
   }
 
   if (device_param->is_opencl == true)
@@ -3627,13 +3627,13 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
     CUfunction cuda_function = device_param->cuda_function_decompress;
 
-    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_decompress, NULL);
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_decompress, NULL);
 
     if (rc_cuLaunchKernel == -1) return -1;
 
-    const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx);
+    const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
 
-    if (rc_cuCtxSynchronize == -1) return -1;
+    if (rc_cuStreamSynchronize == -1) return -1;
   }
 
   if (device_param->is_opencl == true)
@@ -5387,7 +5387,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       CUcontext cuda_context;
 
-      const int rc_cuCtxCreate = hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_YIELD, device_param->cuda_device);
+      const int rc_cuCtxCreate = hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device);
 
       if (rc_cuCtxCreate == -1) return -1;
 
@@ -7062,7 +7062,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_cuda == true)
     {
-      CU_rc = hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_YIELD, device_param->cuda_device);
+      CU_rc = hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device);
 
       if (CU_rc == -1) return -1;
     }

From d0bd33c9d131e999a76272dd45910d9d44abbdac Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 6 May 2019 14:34:16 +0200
Subject: [PATCH 32/73] Rename CONSTANT_AS to CONSTANT_VK

---
 OpenCL/inc_cipher_aes.cl        | 20 +++++++--------
 OpenCL/inc_cipher_camellia.cl   |  2 +-
 OpenCL/inc_cipher_des.cl        |  4 +--
 OpenCL/inc_cipher_kuznyechik.cl |  4 +--
 OpenCL/inc_cipher_twofish.cl    |  4 +--
 OpenCL/inc_common.cl            | 32 +++++++++++++++++------
 OpenCL/inc_hash_sha224.cl       |  2 +-
 OpenCL/inc_hash_sha256.cl       |  2 +-
 OpenCL/inc_hash_sha384.cl       |  2 +-
 OpenCL/inc_hash_sha512.cl       |  2 +-
 OpenCL/inc_hash_streebog256.cl  |  4 +--
 OpenCL/inc_hash_streebog512.cl  |  4 +--
 OpenCL/inc_hash_whirlpool.cl    |  8 +++---
 OpenCL/inc_platform.cl          | 40 +++++++++++++++++++++++++++++
 OpenCL/inc_platform.h           | 11 +++++++-
 OpenCL/inc_truecrypt_crc32.cl   |  2 +-
 OpenCL/inc_types.h              | 45 ++++++++++++++++++++++++++++++++-
 OpenCL/inc_vendor.h             |  3 +++
 OpenCL/m01500_a0-pure.cl        |  4 +--
 OpenCL/m01500_a1-pure.cl        |  4 +--
 OpenCL/m03000_a0-pure.cl        |  4 +--
 OpenCL/m03000_a1-pure.cl        |  4 +--
 OpenCL/m03200-pure.cl           |  8 +++---
 OpenCL/m05500_a0-optimized.cl   |  4 +--
 OpenCL/m05500_a0-pure.cl        |  4 +--
 OpenCL/m05500_a1-optimized.cl   |  4 +--
 OpenCL/m05500_a1-pure.cl        |  4 +--
 OpenCL/m05500_a3-optimized.cl   |  4 +--
 OpenCL/m05500_a3-pure.cl        |  4 +--
 OpenCL/m05800-optimized.cl      |  4 +--
 OpenCL/m05800-pure.cl           |  4 +--
 OpenCL/m06900_a0-optimized.cl   |  2 +-
 OpenCL/m06900_a1-optimized.cl   |  2 +-
 OpenCL/m06900_a3-optimized.cl   |  2 +-
 OpenCL/m07700_a0-optimized.cl   |  4 +--
 OpenCL/m07700_a1-optimized.cl   |  4 +--
 OpenCL/m07700_a3-optimized.cl   |  4 +--
 OpenCL/m07701_a0-optimized.cl   |  4 +--
 OpenCL/m07701_a1-optimized.cl   |  4 +--
 OpenCL/m07701_a3-optimized.cl   |  4 +--
 OpenCL/m07800_a0-optimized.cl   |  2 +-
 OpenCL/m07800_a1-optimized.cl   |  2 +-
 OpenCL/m07800_a3-optimized.cl   |  2 +-
 OpenCL/m07801_a0-optimized.cl   |  2 +-
 OpenCL/m07801_a1-optimized.cl   |  2 +-
 OpenCL/m07801_a3-optimized.cl   |  2 +-
 OpenCL/m08500_a0-pure.cl        |  6 ++---
 OpenCL/m08500_a1-pure.cl        |  6 ++---
 OpenCL/m08500_a3-pure.cl        |  6 ++---
 OpenCL/m08600_a0-pure.cl        |  2 +-
 OpenCL/m08600_a1-pure.cl        |  2 +-
 OpenCL/m08600_a3-pure.cl        |  2 +-
 OpenCL/m08700_a0-optimized.cl   |  2 +-
 OpenCL/m08700_a1-optimized.cl   |  2 +-
 OpenCL/m08700_a3-optimized.cl   |  2 +-
 OpenCL/m09000-pure.cl           | 10 ++++----
 OpenCL/m09100-pure.cl           |  4 +--
 OpenCL/m10400_a0-optimized.cl   |  2 +-
 OpenCL/m10400_a1-optimized.cl   |  2 +-
 OpenCL/m10400_a3-optimized.cl   |  2 +-
 OpenCL/m10410_a0-optimized.cl   |  2 +-
 OpenCL/m10410_a1-optimized.cl   |  2 +-
 OpenCL/m10410_a3-optimized.cl   |  2 +-
 OpenCL/m10420_a0-optimized.cl   |  2 +-
 OpenCL/m10420_a1-optimized.cl   |  2 +-
 OpenCL/m10420_a3-optimized.cl   |  2 +-
 OpenCL/m10500-pure.cl           |  2 +-
 OpenCL/m11500_a0-optimized.cl   |  2 +-
 OpenCL/m11500_a1-optimized.cl   |  2 +-
 OpenCL/m11500_a3-optimized.cl   |  2 +-
 OpenCL/m12400-pure.cl           |  4 +--
 OpenCL/m14000_a0-pure.cl        |  4 +--
 OpenCL/m14000_a1-pure.cl        |  4 +--
 OpenCL/m14100_a0-pure.cl        |  4 +--
 OpenCL/m14100_a1-pure.cl        |  4 +--
 OpenCL/m14100_a3-pure.cl        |  4 +--
 OpenCL/m14900_a0-optimized.cl   |  2 +-
 OpenCL/m14900_a1-optimized.cl   |  2 +-
 OpenCL/m14900_a3-optimized.cl   |  2 +-
 OpenCL/m15600-pure.cl           |  2 +-
 OpenCL/m15700-pure.cl           |  2 +-
 OpenCL/m16000_a0-pure.cl        |  6 ++---
 OpenCL/m16000_a1-pure.cl        |  6 ++---
 OpenCL/m16000_a3-pure.cl        |  6 ++---
 OpenCL/m16300-pure.cl           |  2 +-
 OpenCL/m17300_a0-optimized.cl   |  2 +-
 OpenCL/m17300_a1-optimized.cl   |  2 +-
 OpenCL/m17300_a3-optimized.cl   |  2 +-
 OpenCL/m17400_a0-optimized.cl   |  2 +-
 OpenCL/m17400_a1-optimized.cl   |  2 +-
 OpenCL/m17400_a3-optimized.cl   |  2 +-
 OpenCL/m17500_a0-optimized.cl   |  2 +-
 OpenCL/m17500_a1-optimized.cl   |  2 +-
 OpenCL/m17500_a3-optimized.cl   |  2 +-
 OpenCL/m17600_a0-optimized.cl   |  2 +-
 OpenCL/m17600_a1-optimized.cl   |  2 +-
 OpenCL/m17600_a3-optimized.cl   |  2 +-
 OpenCL/m17700_a0-optimized.cl   |  2 +-
 OpenCL/m17700_a1-optimized.cl   |  2 +-
 OpenCL/m17700_a3-optimized.cl   |  2 +-
 OpenCL/m17800_a0-optimized.cl   |  2 +-
 OpenCL/m17800_a1-optimized.cl   |  2 +-
 OpenCL/m17800_a3-optimized.cl   |  2 +-
 OpenCL/m17900_a0-optimized.cl   |  2 +-
 OpenCL/m17900_a1-optimized.cl   |  2 +-
 OpenCL/m17900_a3-optimized.cl   |  2 +-
 OpenCL/m18000_a0-optimized.cl   |  2 +-
 OpenCL/m18000_a1-optimized.cl   |  2 +-
 OpenCL/m18000_a3-optimized.cl   |  2 +-
 OpenCL/m18600-pure.cl           | 10 ++++----
 src/backend.c                   |  8 +++---
 111 files changed, 295 insertions(+), 184 deletions(-)

diff --git a/OpenCL/inc_cipher_aes.cl b/OpenCL/inc_cipher_aes.cl
index 8425414cd..716addc39 100644
--- a/OpenCL/inc_cipher_aes.cl
+++ b/OpenCL/inc_cipher_aes.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_cipher_aes.h"
 
-CONSTANT_AS u32a te0[256] =
+CONSTANT_VK u32a te0[256] =
 {
   0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
   0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
@@ -77,7 +77,7 @@ CONSTANT_AS u32a te0[256] =
   0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
 };
 
-CONSTANT_AS u32a te1[256] =
+CONSTANT_VK u32a te1[256] =
 {
   0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
   0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
@@ -145,7 +145,7 @@ CONSTANT_AS u32a te1[256] =
   0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
 };
 
-CONSTANT_AS u32a te2[256] =
+CONSTANT_VK u32a te2[256] =
 {
   0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b,
   0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
@@ -213,7 +213,7 @@ CONSTANT_AS u32a te2[256] =
   0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
 };
 
-CONSTANT_AS u32a te3[256] =
+CONSTANT_VK u32a te3[256] =
 {
   0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6,
   0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
@@ -281,7 +281,7 @@ CONSTANT_AS u32a te3[256] =
   0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
 };
 
-CONSTANT_AS u32a te4[256] =
+CONSTANT_VK u32a te4[256] =
 {
   0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b,
   0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5,
@@ -349,7 +349,7 @@ CONSTANT_AS u32a te4[256] =
   0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616,
 };
 
-CONSTANT_AS u32a td0[256] =
+CONSTANT_VK u32a td0[256] =
 {
   0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
   0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
@@ -417,7 +417,7 @@ CONSTANT_AS u32a td0[256] =
   0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
 };
 
-CONSTANT_AS u32a td1[256] =
+CONSTANT_VK u32a td1[256] =
 {
   0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
   0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
@@ -485,7 +485,7 @@ CONSTANT_AS u32a td1[256] =
   0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
 };
 
-CONSTANT_AS u32a td2[256] =
+CONSTANT_VK u32a td2[256] =
 {
   0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27,
   0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
@@ -553,7 +553,7 @@ CONSTANT_AS u32a td2[256] =
   0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
 };
 
-CONSTANT_AS u32a td3[256] =
+CONSTANT_VK u32a td3[256] =
 {
   0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a,
   0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
@@ -621,7 +621,7 @@ CONSTANT_AS u32a td3[256] =
   0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
 };
 
-CONSTANT_AS u32a td4[256] =
+CONSTANT_VK u32a td4[256] =
 {
   0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5,
   0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838,
diff --git a/OpenCL/inc_cipher_camellia.cl b/OpenCL/inc_cipher_camellia.cl
index 881e541bf..08c73e6f4 100644
--- a/OpenCL/inc_cipher_camellia.cl
+++ b/OpenCL/inc_cipher_camellia.cl
@@ -21,7 +21,7 @@
 #include "inc_common.h"
 #include "inc_cipher_camellia.h"
 
-CONSTANT_AS u32a c_sbox[256] =
+CONSTANT_VK u32a c_sbox[256] =
 {
   0x70, 0x82, 0x2c, 0xec, 0xb3, 0x27, 0xc0, 0xe5,
   0xe4, 0x85, 0x57, 0x35, 0xea, 0x0c, 0xae, 0x41,
diff --git a/OpenCL/inc_cipher_des.cl b/OpenCL/inc_cipher_des.cl
index a90d5788b..b247e1e22 100644
--- a/OpenCL/inc_cipher_des.cl
+++ b/OpenCL/inc_cipher_des.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_cipher_des.h"
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     /* nibble 0 */
@@ -165,7 +165,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/inc_cipher_kuznyechik.cl b/OpenCL/inc_cipher_kuznyechik.cl
index 2c3792b96..a5768f70f 100644
--- a/OpenCL/inc_cipher_kuznyechik.cl
+++ b/OpenCL/inc_cipher_kuznyechik.cl
@@ -18,7 +18,7 @@
 #include "inc_common.h"
 #include "inc_cipher_kuznyechik.h"
 
-CONSTANT_AS u32a k_sbox[256] =
+CONSTANT_VK u32a k_sbox[256] =
 {
   0xfc, 0xee, 0xdd, 0x11, 0xcf, 0x6e, 0x31, 0x16,
   0xfb, 0xc4, 0xfa, 0xda, 0x23, 0xc5, 0x04, 0x4d,
@@ -54,7 +54,7 @@ CONSTANT_AS u32a k_sbox[256] =
   0xd1, 0x66, 0xaf, 0xc2, 0x39, 0x4b, 0x63, 0xb6
 };
 
-CONSTANT_AS u32a k_sbox_inv[256] =
+CONSTANT_VK u32a k_sbox_inv[256] =
 {
   0xa5, 0x2d, 0x32, 0x8f, 0x0e, 0x30, 0x38, 0xc0,
   0x54, 0xe6, 0x9e, 0x39, 0x55, 0x7e, 0x52, 0x91,
diff --git a/OpenCL/inc_cipher_twofish.cl b/OpenCL/inc_cipher_twofish.cl
index 90b6cf600..dc0461b24 100644
--- a/OpenCL/inc_cipher_twofish.cl
+++ b/OpenCL/inc_cipher_twofish.cl
@@ -25,7 +25,7 @@
 #include "inc_common.h"
 #include "inc_cipher_twofish.h"
 
-CONSTANT_AS u32a q_tab[2][256] =
+CONSTANT_VK u32a q_tab[2][256] =
 {
   {
     0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
@@ -77,7 +77,7 @@ CONSTANT_AS u32a q_tab[2][256] =
   }
 };
 
-CONSTANT_AS u32a  m_tab[4][256] =
+CONSTANT_VK u32a  m_tab[4][256] =
 {
   { 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
     0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index 844e4ba12..68fdf0b88 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -305,7 +305,9 @@ DECLSPEC u64x hl32_to_64 (const u32x a, const u32x b)
 
 DECLSPEC u32x hc_rotl32 (const u32x a, const int n)
 {
-  #ifdef _CPU_OPENCL_EMU_H
+  #if   defined _CPU_OPENCL_EMU_H
+  return rotl32 (a, n);
+  #elif defined IS_CUDA
   return rotl32 (a, n);
   #else
   return rotate (a, (u32x) (n));
@@ -314,7 +316,9 @@ DECLSPEC u32x hc_rotl32 (const u32x a, const int n)
 
 DECLSPEC u32x hc_rotr32 (const u32x a, const int n)
 {
-  #ifdef _CPU_OPENCL_EMU_H
+  #if   defined _CPU_OPENCL_EMU_H
+  return rotr32 (a, n);
+  #elif defined IS_CUDA
   return rotr32 (a, n);
   #else
   return rotate (a, (u32x) (32 - n));
@@ -323,8 +327,10 @@ DECLSPEC u32x hc_rotr32 (const u32x a, const int n)
 
 DECLSPEC u32 hc_rotl32_S (const u32 a, const int n)
 {
-  #ifdef _CPU_OPENCL_EMU_H
+  #if   defined _CPU_OPENCL_EMU_H
   return rotl32 (a, n);
+  #elif defined IS_CUDA
+  return rotl32_S (a, n);
   #else
   return rotate (a, (u32) (n));
   #endif
@@ -332,8 +338,10 @@ DECLSPEC u32 hc_rotl32_S (const u32 a, const int n)
 
 DECLSPEC u32 hc_rotr32_S (const u32 a, const int n)
 {
-  #ifdef _CPU_OPENCL_EMU_H
+  #if   defined _CPU_OPENCL_EMU_H
   return rotr32 (a, n);
+  #elif defined IS_CUDA
+  return rotr32_S (a, n);
   #else
   return rotate (a, (u32) (32 - n));
   #endif
@@ -341,7 +349,9 @@ DECLSPEC u32 hc_rotr32_S (const u32 a, const int n)
 
 DECLSPEC u64x hc_rotl64 (const u64x a, const int n)
 {
-  #ifdef _CPU_OPENCL_EMU_H
+  #if   defined _CPU_OPENCL_EMU_H
+  return rotl64 (a, n);
+  #elif defined IS_CUDA
   return rotl64 (a, n);
   #else
   return rotate (a, (u64x) (n));
@@ -350,7 +360,9 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n)
 
 DECLSPEC u64x hc_rotr64 (const u64x a, const int n)
 {
-  #ifdef _CPU_OPENCL_EMU_H
+  #if   defined _CPU_OPENCL_EMU_H
+  return rotr64 (a, n);
+  #elif defined IS_CUDA
   return rotr64 (a, n);
   #else
   return rotate (a, (u64x) (64 - n));
@@ -359,8 +371,10 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n)
 
 DECLSPEC u64 hc_rotl64_S (const u64 a, const int n)
 {
-  #ifdef _CPU_OPENCL_EMU_H
+  #if   defined _CPU_OPENCL_EMU_H
   return rotl64 (a, n);
+  #elif defined IS_CUDA
+  return rotl64_S (a, n);
   #else
   return rotate (a, (u64) (n));
   #endif
@@ -368,8 +382,10 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n)
 
 DECLSPEC u64 hc_rotr64_S (const u64 a, const int n)
 {
-  #ifdef _CPU_OPENCL_EMU_H
+  #if   defined _CPU_OPENCL_EMU_H
   return rotr64 (a, n);
+  #elif defined IS_CUDA
+  return rotr64_S (a, n);
   #else
   return rotate (a, (u64) (64 - n));
   #endif
diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl
index 0758b1f54..e93206e44 100644
--- a/OpenCL/inc_hash_sha224.cl
+++ b/OpenCL/inc_hash_sha224.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_hash_sha224.h"
 
-CONSTANT_AS u32a k_sha224[64] =
+CONSTANT_VK u32a k_sha224[64] =
 {
   SHA224C00, SHA224C01, SHA224C02, SHA224C03,
   SHA224C04, SHA224C05, SHA224C06, SHA224C07,
diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl
index f52e551da..de2bd5897 100644
--- a/OpenCL/inc_hash_sha256.cl
+++ b/OpenCL/inc_hash_sha256.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_hash_sha256.h"
 
-CONSTANT_AS u32a k_sha256[64] =
+CONSTANT_VK u32a k_sha256[64] =
 {
   SHA256C00, SHA256C01, SHA256C02, SHA256C03,
   SHA256C04, SHA256C05, SHA256C06, SHA256C07,
diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl
index cdfa357ca..ea26ec734 100644
--- a/OpenCL/inc_hash_sha384.cl
+++ b/OpenCL/inc_hash_sha384.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_hash_sha384.h"
 
-CONSTANT_AS u64a k_sha384[80] =
+CONSTANT_VK u64a k_sha384[80] =
 {
   SHA512C00, SHA512C01, SHA512C02, SHA512C03,
   SHA512C04, SHA512C05, SHA512C06, SHA512C07,
diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl
index 9ea5463d8..783a66fbe 100644
--- a/OpenCL/inc_hash_sha512.cl
+++ b/OpenCL/inc_hash_sha512.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_hash_sha512.h"
 
-CONSTANT_AS u64a k_sha512[80] =
+CONSTANT_VK u64a k_sha512[80] =
 {
   SHA512C00, SHA512C01, SHA512C02, SHA512C03,
   SHA512C04, SHA512C05, SHA512C06, SHA512C07,
diff --git a/OpenCL/inc_hash_streebog256.cl b/OpenCL/inc_hash_streebog256.cl
index 58900e7d7..d97926000 100644
--- a/OpenCL/inc_hash_streebog256.cl
+++ b/OpenCL/inc_hash_streebog256.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_hash_streebog256.h"
 
-CONSTANT_AS u64a sbob256_sl64[8][256] =
+CONSTANT_VK u64a sbob256_sl64[8][256] =
 {
   {
     0xd031c397ce553fe6, 0x16ba5b01b006b525, 0xa89bade6296e70c8, 0x6a1f525d77d3435b,
@@ -541,7 +541,7 @@ CONSTANT_AS u64a sbob256_sl64[8][256] =
   },
 };
 
-CONSTANT_AS u64a sbob256_rc64[12][8] =
+CONSTANT_VK u64a sbob256_rc64[12][8] =
 {
   {
     0xe9daca1eda5b08b1, 0x1f7c65c0812fcbeb, 0x16d0452e43766a2f, 0xfcc485758db84e71,
diff --git a/OpenCL/inc_hash_streebog512.cl b/OpenCL/inc_hash_streebog512.cl
index 7ad416ade..fe86b1822 100644
--- a/OpenCL/inc_hash_streebog512.cl
+++ b/OpenCL/inc_hash_streebog512.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_hash_streebog512.h"
 
-CONSTANT_AS u64a sbob512_sl64[8][256] =
+CONSTANT_VK u64a sbob512_sl64[8][256] =
 {
   {
     0xd031c397ce553fe6, 0x16ba5b01b006b525, 0xa89bade6296e70c8, 0x6a1f525d77d3435b,
@@ -541,7 +541,7 @@ CONSTANT_AS u64a sbob512_sl64[8][256] =
   },
 };
 
-CONSTANT_AS u64a sbob512_rc64[12][8] =
+CONSTANT_VK u64a sbob512_rc64[12][8] =
 {
   {
     0xe9daca1eda5b08b1, 0x1f7c65c0812fcbeb, 0x16d0452e43766a2f, 0xfcc485758db84e71,
diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl
index d4e1206c4..b4933e6f7 100644
--- a/OpenCL/inc_hash_whirlpool.cl
+++ b/OpenCL/inc_hash_whirlpool.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_hash_whirlpool.h"
 
-CONSTANT_AS u32a Ch[8][256] =
+CONSTANT_VK u32a Ch[8][256] =
 {
   {
     0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8,
@@ -541,7 +541,7 @@ CONSTANT_AS u32a Ch[8][256] =
   }
 };
 
-CONSTANT_AS u32a Cl[8][256] =
+CONSTANT_VK u32a Cl[8][256] =
 {
   {
     0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb,
@@ -1073,7 +1073,7 @@ CONSTANT_AS u32a Cl[8][256] =
   },
 };
 
-CONSTANT_AS u32a rch[R + 1] =
+CONSTANT_VK u32a rch[R + 1] =
 {
   0x00000000,
   0x1823c6e8,
@@ -1088,7 +1088,7 @@ CONSTANT_AS u32a rch[R + 1] =
   0xca2dbf07,
 };
 
-CONSTANT_AS u32a rcl[R + 1] =
+CONSTANT_VK u32a rcl[R + 1] =
 {
   0x00000000,
   0x87b8014f,
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index 1dc643173..e5924dd13 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -44,6 +44,46 @@ DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
   return blockDim.x;
 }
 
+DECLSPEC u32x rotl32 (const u32x a, const int n)
+{
+  return ((a << n) | ((a >> (32 - n))));
+}
+
+DECLSPEC u32x rotr32 (const u32x a, const int n)
+{
+  return ((a >> n) | ((a << (32 - n))));
+}
+
+DECLSPEC u32 rotl32_S (const u32 a, const int n)
+{
+  return ((a << n) | ((a >> (32 - n))));
+}
+
+DECLSPEC u32 rotr32_S (const u32 a, const int n)
+{
+  return ((a >> n) | ((a << (32 - n))));
+}
+
+DECLSPEC u64x rotl64 (const u64x a, const int n)
+{
+  return ((a << n) | ((a >> (64 - n))));
+}
+
+DECLSPEC u64x rotr64 (const u64x a, const int n)
+{
+  return ((a >> n) | ((a << (64 - n))));
+}
+
+DECLSPEC u64 rotl64_S (const u64 a, const int n)
+{
+  return ((a << n) | ((a >> (64 - n))));
+}
+
+DECLSPEC u64 rotr64_S (const u64 a, const int n)
+{
+  return ((a >> n) | ((a << (64 - n))));
+}
+
 #define SYNC_THREADS() __syncthreads ()
 #endif
 
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index a8ce27fef..1055838c0 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -14,7 +14,16 @@ DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
 DECLSPEC size_t get_local_id    (const u32 dimindx __attribute__((unused)));
 DECLSPEC size_t get_local_size  (const u32 dimindx __attribute__((unused)));
 
-#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n))))
+DECLSPEC u32x hc_rotl32   (const u32x a, const int n);
+DECLSPEC u32x hc_rotr32   (const u32x a, const int n);
+DECLSPEC u32  hc_rotl32_S (const u32  a, const int n);
+DECLSPEC u32  hc_rotr32_S (const u32  a, const int n);
+DECLSPEC u64x hc_rotl64   (const u64x a, const int n);
+DECLSPEC u64x hc_rotr64   (const u64x a, const int n);
+DECLSPEC u64  hc_rotl64_S (const u64  a, const int n);
+DECLSPEC u64  hc_rotr64_S (const u64  a, const int n);
+
+//#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n))))
 #define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))
 #endif
 
diff --git a/OpenCL/inc_truecrypt_crc32.cl b/OpenCL/inc_truecrypt_crc32.cl
index 391ec91d0..2bc30bffa 100644
--- a/OpenCL/inc_truecrypt_crc32.cl
+++ b/OpenCL/inc_truecrypt_crc32.cl
@@ -9,7 +9,7 @@
 #include "inc_common.h"
 #include "inc_truecrypt_crc32.h"
 
-CONSTANT_AS u32a crc32tab[0x100] =
+CONSTANT_VK u32a crc32tab[0x100] =
 {
   0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
   0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 77997dd97..d1c1f4498 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -368,7 +368,6 @@ inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~
 
 #if VECT_SIZE == 8
 
-
 class u8x
 {
   private:
@@ -497,6 +496,50 @@ inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((
 
 inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
 
+inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)   ); }
+inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
+
+inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;     }
+inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7;  }
+
+inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;     }
+inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7;  }
+
+inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;     }
+inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7;  }
+
+inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;     }
+inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7;  }
+
+inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;     }
+inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7;  }
+
+inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b)   );  }
+inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7));  }
+
+inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b)   );  }
+inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7));  }
+
+inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b)   );  }
+inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7));  }
+
+inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b)   );  }
+inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7));  }
+
+inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b)   );  }
+inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7));  }
+
+inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b)   );  }
+inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7));  }
+
+inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b)   );  }
+inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7));  }
+
+inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b)   );  }
+inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7));  }
+
+inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
+
 #endif
 
 #if VECT_SIZE == 16
diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index f69e0573b..279c87c02 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -15,16 +15,19 @@
 #endif
 
 #if defined IS_NATIVE
+#define CONSTANT_VK
 #define CONSTANT_AS
 #define GLOBAL_AS
 #define LOCAL_AS
 #define KERNEL_FQ
 #elif defined IS_CUDA
+#define CONSTANT_VK __constant__
 #define CONSTANT_AS
 #define GLOBAL_AS
 #define LOCAL_AS
 #define KERNEL_FQ   extern "C" __global__
 #elif defined IS_OPENCL
+#define CONSTANT_VK __constant
 #define CONSTANT_AS __constant
 #define GLOBAL_AS   __global
 #define LOCAL_AS    __local
diff --git a/OpenCL/m01500_a0-pure.cl b/OpenCL/m01500_a0-pure.cl
index beabe09f9..944f04de0 100644
--- a/OpenCL/m01500_a0-pure.cl
+++ b/OpenCL/m01500_a0-pure.cl
@@ -35,7 +35,7 @@
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x00820200, 0x00020000, 0x80800000, 0x80820200,
@@ -183,7 +183,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m01500_a1-pure.cl b/OpenCL/m01500_a1-pure.cl
index 3d9f06e01..3bcf6344f 100644
--- a/OpenCL/m01500_a1-pure.cl
+++ b/OpenCL/m01500_a1-pure.cl
@@ -33,7 +33,7 @@
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x00820200, 0x00020000, 0x80800000, 0x80820200,
@@ -181,7 +181,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m03000_a0-pure.cl b/OpenCL/m03000_a0-pure.cl
index 96ef208e2..a2eb1ad60 100644
--- a/OpenCL/m03000_a0-pure.cl
+++ b/OpenCL/m03000_a0-pure.cl
@@ -35,7 +35,7 @@
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -183,7 +183,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m03000_a1-pure.cl b/OpenCL/m03000_a1-pure.cl
index d0e7f6b9f..92665dda4 100644
--- a/OpenCL/m03000_a1-pure.cl
+++ b/OpenCL/m03000_a1-pure.cl
@@ -33,7 +33,7 @@
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -181,7 +181,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m03200-pure.cl b/OpenCL/m03200-pure.cl
index 4b7c6c79d..938cb1f48 100644
--- a/OpenCL/m03200-pure.cl
+++ b/OpenCL/m03200-pure.cl
@@ -28,7 +28,7 @@ typedef struct bcrypt_tmp
 
 // http://www.schneier.com/code/constants.txt
 
-CONSTANT_AS u32a c_sbox0[256] =
+CONSTANT_VK u32a c_sbox0[256] =
 {
   0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
   0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
@@ -96,7 +96,7 @@ CONSTANT_AS u32a c_sbox0[256] =
   0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a
 };
 
-CONSTANT_AS u32a c_sbox1[256] =
+CONSTANT_VK u32a c_sbox1[256] =
 {
   0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
   0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
@@ -164,7 +164,7 @@ CONSTANT_AS u32a c_sbox1[256] =
   0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7
 };
 
-CONSTANT_AS u32a c_sbox2[256] =
+CONSTANT_VK u32a c_sbox2[256] =
 {
   0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
   0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
@@ -232,7 +232,7 @@ CONSTANT_AS u32a c_sbox2[256] =
   0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0
 };
 
-CONSTANT_AS u32a c_sbox3[256] =
+CONSTANT_VK u32a c_sbox3[256] =
 {
   0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
   0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
diff --git a/OpenCL/m05500_a0-optimized.cl b/OpenCL/m05500_a0-optimized.cl
index f9fd1e11a..893cbc2c1 100644
--- a/OpenCL/m05500_a0-optimized.cl
+++ b/OpenCL/m05500_a0-optimized.cl
@@ -48,7 +48,7 @@ typedef struct netntlm
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -196,7 +196,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m05500_a0-pure.cl b/OpenCL/m05500_a0-pure.cl
index aaef0fbf9..e2087f918 100644
--- a/OpenCL/m05500_a0-pure.cl
+++ b/OpenCL/m05500_a0-pure.cl
@@ -48,7 +48,7 @@ typedef struct netntlm
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -196,7 +196,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m05500_a1-optimized.cl b/OpenCL/m05500_a1-optimized.cl
index a1f12ff86..2f4a8a912 100644
--- a/OpenCL/m05500_a1-optimized.cl
+++ b/OpenCL/m05500_a1-optimized.cl
@@ -46,7 +46,7 @@ typedef struct netntlm
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -194,7 +194,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m05500_a1-pure.cl b/OpenCL/m05500_a1-pure.cl
index 3e482266c..6872de6cc 100644
--- a/OpenCL/m05500_a1-pure.cl
+++ b/OpenCL/m05500_a1-pure.cl
@@ -46,7 +46,7 @@ typedef struct netntlm
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -194,7 +194,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl
index ec25450db..9901e818d 100644
--- a/OpenCL/m05500_a3-optimized.cl
+++ b/OpenCL/m05500_a3-optimized.cl
@@ -46,7 +46,7 @@ typedef struct netntlm
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -194,7 +194,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m05500_a3-pure.cl b/OpenCL/m05500_a3-pure.cl
index eca392158..467818f9f 100644
--- a/OpenCL/m05500_a3-pure.cl
+++ b/OpenCL/m05500_a3-pure.cl
@@ -46,7 +46,7 @@ typedef struct netntlm
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -194,7 +194,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl
index 658a9f93e..4d013411c 100644
--- a/OpenCL/m05800-optimized.cl
+++ b/OpenCL/m05800-optimized.cl
@@ -20,7 +20,7 @@ typedef struct androidpin_tmp
 
 } androidpin_tmp_t;
 
-CONSTANT_AS u32a c_pc_dec[1024] =
+CONSTANT_VK u32a c_pc_dec[1024] =
 {
   0x00000030,
   0x00000031,
@@ -1048,7 +1048,7 @@ CONSTANT_AS u32a c_pc_dec[1024] =
   0x33323031,
 };
 
-CONSTANT_AS u32a c_pc_len[1024] =
+CONSTANT_VK u32a c_pc_len[1024] =
 {
   1,
   1,
diff --git a/OpenCL/m05800-pure.cl b/OpenCL/m05800-pure.cl
index e7cddac46..1813576b7 100644
--- a/OpenCL/m05800-pure.cl
+++ b/OpenCL/m05800-pure.cl
@@ -20,7 +20,7 @@ typedef struct androidpin_tmp
 
 } androidpin_tmp_t;
 
-CONSTANT_AS u32a c_pc_dec[1024] =
+CONSTANT_VK u32a c_pc_dec[1024] =
 {
   0x00000030,
   0x00000031,
@@ -1048,7 +1048,7 @@ CONSTANT_AS u32a c_pc_dec[1024] =
   0x33323031,
 };
 
-CONSTANT_AS u32a c_pc_len[1024] =
+CONSTANT_VK u32a c_pc_len[1024] =
 {
   1,
   1,
diff --git a/OpenCL/m06900_a0-optimized.cl b/OpenCL/m06900_a0-optimized.cl
index 07939b450..5c3ef3850 100644
--- a/OpenCL/m06900_a0-optimized.cl
+++ b/OpenCL/m06900_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a c_tables[4][256] =
+CONSTANT_VK u32a c_tables[4][256] =
 {
   {
     0x00072000, 0x00075000, 0x00074800, 0x00071000,
diff --git a/OpenCL/m06900_a1-optimized.cl b/OpenCL/m06900_a1-optimized.cl
index 7cf40f6cf..41cc12653 100644
--- a/OpenCL/m06900_a1-optimized.cl
+++ b/OpenCL/m06900_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a c_tables[4][256] =
+CONSTANT_VK u32a c_tables[4][256] =
 {
   {
     0x00072000, 0x00075000, 0x00074800, 0x00071000,
diff --git a/OpenCL/m06900_a3-optimized.cl b/OpenCL/m06900_a3-optimized.cl
index abfcda648..4f201a4c1 100644
--- a/OpenCL/m06900_a3-optimized.cl
+++ b/OpenCL/m06900_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a c_tables[4][256] =
+CONSTANT_VK u32a c_tables[4][256] =
 {
   {
     0x00072000, 0x00075000, 0x00074800, 0x00071000,
diff --git a/OpenCL/m07700_a0-optimized.cl b/OpenCL/m07700_a0-optimized.cl
index 824307804..2c633028b 100644
--- a/OpenCL/m07700_a0-optimized.cl
+++ b/OpenCL/m07700_a0-optimized.cl
@@ -29,7 +29,7 @@
   (a)[((n)/4)+1]  = x >> 32;        \
 }
 
-CONSTANT_AS u32a sapb_trans_tbl[256] =
+CONSTANT_VK u32a sapb_trans_tbl[256] =
 {
   // first value hack for 0 byte as part of an optimization
   0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -50,7 +50,7 @@ CONSTANT_AS u32a sapb_trans_tbl[256] =
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
 
-CONSTANT_AS u32a bcodeArray[48] =
+CONSTANT_VK u32a bcodeArray[48] =
 {
   0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91,
   0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51,
diff --git a/OpenCL/m07700_a1-optimized.cl b/OpenCL/m07700_a1-optimized.cl
index 46877fd70..525d9c257 100644
--- a/OpenCL/m07700_a1-optimized.cl
+++ b/OpenCL/m07700_a1-optimized.cl
@@ -27,7 +27,7 @@
   (a)[((n)/4)+1]  = x >> 32;        \
 }
 
-CONSTANT_AS u32a sapb_trans_tbl[256] =
+CONSTANT_VK u32a sapb_trans_tbl[256] =
 {
   // first value hack for 0 byte as part of an optimization
   0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -48,7 +48,7 @@ CONSTANT_AS u32a sapb_trans_tbl[256] =
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
 
-CONSTANT_AS u32a bcodeArray[48] =
+CONSTANT_VK u32a bcodeArray[48] =
 {
   0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91,
   0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51,
diff --git a/OpenCL/m07700_a3-optimized.cl b/OpenCL/m07700_a3-optimized.cl
index da3865e91..78927041b 100644
--- a/OpenCL/m07700_a3-optimized.cl
+++ b/OpenCL/m07700_a3-optimized.cl
@@ -18,7 +18,7 @@
 #define GETCHAR(a,p)  (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
 #define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
 
-CONSTANT_AS u32a sapb_trans_tbl[256] =
+CONSTANT_VK u32a sapb_trans_tbl[256] =
 {
   // first value hack for 0 byte as part of an optimization
   0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -39,7 +39,7 @@ CONSTANT_AS u32a sapb_trans_tbl[256] =
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
 
-CONSTANT_AS u32a bcodeArray[48] =
+CONSTANT_VK u32a bcodeArray[48] =
 {
   0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91,
   0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51,
diff --git a/OpenCL/m07701_a0-optimized.cl b/OpenCL/m07701_a0-optimized.cl
index e20c9c505..99fbb8183 100644
--- a/OpenCL/m07701_a0-optimized.cl
+++ b/OpenCL/m07701_a0-optimized.cl
@@ -29,7 +29,7 @@
   (a)[((n)/4)+1]  = x >> 32;        \
 }
 
-CONSTANT_AS u32a sapb_trans_tbl[256] =
+CONSTANT_VK u32a sapb_trans_tbl[256] =
 {
   // first value hack for 0 byte as part of an optimization
   0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -50,7 +50,7 @@ CONSTANT_AS u32a sapb_trans_tbl[256] =
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
 
-CONSTANT_AS u32a bcodeArray[48] =
+CONSTANT_VK u32a bcodeArray[48] =
 {
   0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91,
   0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51,
diff --git a/OpenCL/m07701_a1-optimized.cl b/OpenCL/m07701_a1-optimized.cl
index e403114ab..04609d8e0 100644
--- a/OpenCL/m07701_a1-optimized.cl
+++ b/OpenCL/m07701_a1-optimized.cl
@@ -27,7 +27,7 @@
   (a)[((n)/4)+1]  = x >> 32;        \
 }
 
-CONSTANT_AS u32a sapb_trans_tbl[256] =
+CONSTANT_VK u32a sapb_trans_tbl[256] =
 {
   // first value hack for 0 byte as part of an optimization
   0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -48,7 +48,7 @@ CONSTANT_AS u32a sapb_trans_tbl[256] =
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
 
-CONSTANT_AS u32a bcodeArray[48] =
+CONSTANT_VK u32a bcodeArray[48] =
 {
   0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91,
   0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51,
diff --git a/OpenCL/m07701_a3-optimized.cl b/OpenCL/m07701_a3-optimized.cl
index 5dad95087..506ca491f 100644
--- a/OpenCL/m07701_a3-optimized.cl
+++ b/OpenCL/m07701_a3-optimized.cl
@@ -18,7 +18,7 @@
 #define GETCHAR(a,p)  (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
 #define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
 
-CONSTANT_AS u32a sapb_trans_tbl[256] =
+CONSTANT_VK u32a sapb_trans_tbl[256] =
 {
   // first value hack for 0 byte as part of an optimization
   0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -39,7 +39,7 @@ CONSTANT_AS u32a sapb_trans_tbl[256] =
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
 
-CONSTANT_AS u32a bcodeArray[48] =
+CONSTANT_VK u32a bcodeArray[48] =
 {
   0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91,
   0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51,
diff --git a/OpenCL/m07800_a0-optimized.cl b/OpenCL/m07800_a0-optimized.cl
index 720d68ad0..4d86b2858 100644
--- a/OpenCL/m07800_a0-optimized.cl
+++ b/OpenCL/m07800_a0-optimized.cl
@@ -17,7 +17,7 @@
 #include "inc_hash_sha1.cl"
 #endif
 
-CONSTANT_AS u32a theMagicArray[64] =
+CONSTANT_VK u32a theMagicArray[64] =
 {
   0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f,
   0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194,
diff --git a/OpenCL/m07800_a1-optimized.cl b/OpenCL/m07800_a1-optimized.cl
index b2c8358fb..cc552143f 100644
--- a/OpenCL/m07800_a1-optimized.cl
+++ b/OpenCL/m07800_a1-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_hash_sha1.cl"
 #endif
 
-CONSTANT_AS u32a theMagicArray[64] =
+CONSTANT_VK u32a theMagicArray[64] =
 {
   0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f,
   0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194,
diff --git a/OpenCL/m07800_a3-optimized.cl b/OpenCL/m07800_a3-optimized.cl
index 55065786d..de22475f0 100644
--- a/OpenCL/m07800_a3-optimized.cl
+++ b/OpenCL/m07800_a3-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_hash_sha1.cl"
 #endif
 
-CONSTANT_AS u32a theMagicArray[64] =
+CONSTANT_VK u32a theMagicArray[64] =
 {
   0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f,
   0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194,
diff --git a/OpenCL/m07801_a0-optimized.cl b/OpenCL/m07801_a0-optimized.cl
index 0ebdf2e9a..aeb9b95d0 100644
--- a/OpenCL/m07801_a0-optimized.cl
+++ b/OpenCL/m07801_a0-optimized.cl
@@ -17,7 +17,7 @@
 #include "inc_hash_sha1.cl"
 #endif
 
-CONSTANT_AS u32a theMagicArray[64] =
+CONSTANT_VK u32a theMagicArray[64] =
 {
   0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f,
   0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194,
diff --git a/OpenCL/m07801_a1-optimized.cl b/OpenCL/m07801_a1-optimized.cl
index 60829f3f7..8d055619c 100644
--- a/OpenCL/m07801_a1-optimized.cl
+++ b/OpenCL/m07801_a1-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_hash_sha1.cl"
 #endif
 
-CONSTANT_AS u32a theMagicArray[64] =
+CONSTANT_VK u32a theMagicArray[64] =
 {
   0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f,
   0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194,
diff --git a/OpenCL/m07801_a3-optimized.cl b/OpenCL/m07801_a3-optimized.cl
index 0a346ca8d..7de9f2a11 100644
--- a/OpenCL/m07801_a3-optimized.cl
+++ b/OpenCL/m07801_a3-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_hash_sha1.cl"
 #endif
 
-CONSTANT_AS u32a theMagicArray[64] =
+CONSTANT_VK u32a theMagicArray[64] =
 {
   0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f,
   0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194,
diff --git a/OpenCL/m08500_a0-pure.cl b/OpenCL/m08500_a0-pure.cl
index 69c768de3..e64afc6cc 100644
--- a/OpenCL/m08500_a0-pure.cl
+++ b/OpenCL/m08500_a0-pure.cl
@@ -53,7 +53,7 @@
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_ascii_to_ebcdic_pc[256] =
+CONSTANT_VK u32a c_ascii_to_ebcdic_pc[256] =
 {
   // little hack, can't crack 0-bytes in password, but who cares
   //    0xab, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5,
@@ -75,7 +75,7 @@ CONSTANT_AS u32a c_ascii_to_ebcdic_pc[256] =
   0x13, 0x10, 0x16, 0x15, 0x7f, 0x7c, 0x73, 0x70, 0x76, 0x75, 0x5e, 0x5d, 0x52, 0x51, 0x57, 0x54,
 };
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -223,7 +223,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m08500_a1-pure.cl b/OpenCL/m08500_a1-pure.cl
index 82df98f55..1a4cf0a23 100644
--- a/OpenCL/m08500_a1-pure.cl
+++ b/OpenCL/m08500_a1-pure.cl
@@ -51,7 +51,7 @@
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_ascii_to_ebcdic_pc[256] =
+CONSTANT_VK u32a c_ascii_to_ebcdic_pc[256] =
 {
   // little hack, can't crack 0-bytes in password, but who cares
   //    0xab, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5,
@@ -73,7 +73,7 @@ CONSTANT_AS u32a c_ascii_to_ebcdic_pc[256] =
   0x13, 0x10, 0x16, 0x15, 0x7f, 0x7c, 0x73, 0x70, 0x76, 0x75, 0x5e, 0x5d, 0x52, 0x51, 0x57, 0x54,
 };
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -221,7 +221,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m08500_a3-pure.cl b/OpenCL/m08500_a3-pure.cl
index 277295e7d..2ddb4efc8 100644
--- a/OpenCL/m08500_a3-pure.cl
+++ b/OpenCL/m08500_a3-pure.cl
@@ -51,7 +51,7 @@
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_ascii_to_ebcdic_pc[256] =
+CONSTANT_VK u32a c_ascii_to_ebcdic_pc[256] =
 {
   // little hack, can't crack 0-bytes in password, but who cares
   //    0xab, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5,
@@ -73,7 +73,7 @@ CONSTANT_AS u32a c_ascii_to_ebcdic_pc[256] =
   0x13, 0x10, 0x16, 0x15, 0x7f, 0x7c, 0x73, 0x70, 0x76, 0x75, 0x5e, 0x5d, 0x52, 0x51, 0x57, 0x54,
 };
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -221,7 +221,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m08600_a0-pure.cl b/OpenCL/m08600_a0-pure.cl
index 22ac6cf46..04c9a3594 100644
--- a/OpenCL/m08600_a0-pure.cl
+++ b/OpenCL/m08600_a0-pure.cl
@@ -16,7 +16,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a lotus_magic_table[256] =
+CONSTANT_VK u32a lotus_magic_table[256] =
 {
   0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a,
   0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0,
diff --git a/OpenCL/m08600_a1-pure.cl b/OpenCL/m08600_a1-pure.cl
index 859278a54..bb081605d 100644
--- a/OpenCL/m08600_a1-pure.cl
+++ b/OpenCL/m08600_a1-pure.cl
@@ -14,7 +14,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a lotus_magic_table[256] =
+CONSTANT_VK u32a lotus_magic_table[256] =
 {
   0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a,
   0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0,
diff --git a/OpenCL/m08600_a3-pure.cl b/OpenCL/m08600_a3-pure.cl
index da94d4a3c..b7b2f29df 100644
--- a/OpenCL/m08600_a3-pure.cl
+++ b/OpenCL/m08600_a3-pure.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a lotus_magic_table[256] =
+CONSTANT_VK u32a lotus_magic_table[256] =
 {
   0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a,
   0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0,
diff --git a/OpenCL/m08700_a0-optimized.cl b/OpenCL/m08700_a0-optimized.cl
index 022281919..add1bba63 100644
--- a/OpenCL/m08700_a0-optimized.cl
+++ b/OpenCL/m08700_a0-optimized.cl
@@ -16,7 +16,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a lotus_magic_table[256] =
+CONSTANT_VK u32a lotus_magic_table[256] =
 {
   0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a,
   0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0,
diff --git a/OpenCL/m08700_a1-optimized.cl b/OpenCL/m08700_a1-optimized.cl
index 94968476f..a8fc8adf3 100644
--- a/OpenCL/m08700_a1-optimized.cl
+++ b/OpenCL/m08700_a1-optimized.cl
@@ -14,7 +14,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a lotus_magic_table[256] =
+CONSTANT_VK u32a lotus_magic_table[256] =
 {
   0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a,
   0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0,
diff --git a/OpenCL/m08700_a3-optimized.cl b/OpenCL/m08700_a3-optimized.cl
index 694a58886..07eaf035c 100644
--- a/OpenCL/m08700_a3-optimized.cl
+++ b/OpenCL/m08700_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a lotus_magic_table[256] =
+CONSTANT_VK u32a lotus_magic_table[256] =
 {
   0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a,
   0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0,
diff --git a/OpenCL/m09000-pure.cl b/OpenCL/m09000-pure.cl
index ec34faab2..e04a8594b 100644
--- a/OpenCL/m09000-pure.cl
+++ b/OpenCL/m09000-pure.cl
@@ -29,7 +29,7 @@ typedef struct pwsafe2_tmp
 
 // http://www.schneier.com/code/constants.txt
 
-CONSTANT_AS u32a c_sbox0[256] =
+CONSTANT_VK u32a c_sbox0[256] =
 {
   0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
   0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
@@ -97,7 +97,7 @@ CONSTANT_AS u32a c_sbox0[256] =
   0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a
 };
 
-CONSTANT_AS u32a c_sbox1[256] =
+CONSTANT_VK u32a c_sbox1[256] =
 {
   0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
   0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
@@ -165,7 +165,7 @@ CONSTANT_AS u32a c_sbox1[256] =
   0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7
 };
 
-CONSTANT_AS u32a c_sbox2[256] =
+CONSTANT_VK u32a c_sbox2[256] =
 {
   0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
   0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
@@ -233,7 +233,7 @@ CONSTANT_AS u32a c_sbox2[256] =
   0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0
 };
 
-CONSTANT_AS u32a c_sbox3[256] =
+CONSTANT_VK u32a c_sbox3[256] =
 {
   0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
   0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
@@ -301,7 +301,7 @@ CONSTANT_AS u32a c_sbox3[256] =
   0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
 };
 
-CONSTANT_AS u32a c_pbox[18] =
+CONSTANT_VK u32a c_pbox[18] =
 {
   0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
   0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
diff --git a/OpenCL/m09100-pure.cl b/OpenCL/m09100-pure.cl
index 51ae77536..004bb3e85 100644
--- a/OpenCL/m09100-pure.cl
+++ b/OpenCL/m09100-pure.cl
@@ -27,7 +27,7 @@ typedef struct lotus8_tmp
 
 } lotus8_tmp_t;
 
-CONSTANT_AS u32a lotus64_table[64] =
+CONSTANT_VK u32a lotus64_table[64] =
 {
   '0', '1', '2', '3', '4', '5', '6', '7',
   '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
@@ -39,7 +39,7 @@ CONSTANT_AS u32a lotus64_table[64] =
   'u', 'v', 'w', 'x', 'y', 'z', '+', '/',
 };
 
-CONSTANT_AS u32a lotus_magic_table[256] =
+CONSTANT_VK u32a lotus_magic_table[256] =
 {
   0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a,
   0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0,
diff --git a/OpenCL/m10400_a0-optimized.cl b/OpenCL/m10400_a0-optimized.cl
index 4da62a07d..fbd378c0b 100644
--- a/OpenCL/m10400_a0-optimized.cl
+++ b/OpenCL/m10400_a0-optimized.cl
@@ -17,7 +17,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10400_a1-optimized.cl b/OpenCL/m10400_a1-optimized.cl
index f1662f380..5ad30d180 100644
--- a/OpenCL/m10400_a1-optimized.cl
+++ b/OpenCL/m10400_a1-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10400_a3-optimized.cl b/OpenCL/m10400_a3-optimized.cl
index 1182a6067..5f134e4d5 100644
--- a/OpenCL/m10400_a3-optimized.cl
+++ b/OpenCL/m10400_a3-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10410_a0-optimized.cl b/OpenCL/m10410_a0-optimized.cl
index 5bbc1c9d7..ede8b87ea 100644
--- a/OpenCL/m10410_a0-optimized.cl
+++ b/OpenCL/m10410_a0-optimized.cl
@@ -17,7 +17,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10410_a1-optimized.cl b/OpenCL/m10410_a1-optimized.cl
index b480d6321..493638f0b 100644
--- a/OpenCL/m10410_a1-optimized.cl
+++ b/OpenCL/m10410_a1-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10410_a3-optimized.cl b/OpenCL/m10410_a3-optimized.cl
index 6bd44c809..0513d4053 100644
--- a/OpenCL/m10410_a3-optimized.cl
+++ b/OpenCL/m10410_a3-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10420_a0-optimized.cl b/OpenCL/m10420_a0-optimized.cl
index 2766e028a..799be19d9 100644
--- a/OpenCL/m10420_a0-optimized.cl
+++ b/OpenCL/m10420_a0-optimized.cl
@@ -16,7 +16,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10420_a1-optimized.cl b/OpenCL/m10420_a1-optimized.cl
index 3fe2fba4e..9f9f26a7f 100644
--- a/OpenCL/m10420_a1-optimized.cl
+++ b/OpenCL/m10420_a1-optimized.cl
@@ -14,7 +14,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10420_a3-optimized.cl b/OpenCL/m10420_a3-optimized.cl
index 47d0f1252..7162048c0 100644
--- a/OpenCL/m10420_a3-optimized.cl
+++ b/OpenCL/m10420_a3-optimized.cl
@@ -14,7 +14,7 @@
 #include "inc_hash_md5.cl"
 #endif
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m10500-pure.cl b/OpenCL/m10500-pure.cl
index ef270689a..844382357 100644
--- a/OpenCL/m10500-pure.cl
+++ b/OpenCL/m10500-pure.cl
@@ -14,7 +14,7 @@
 #define COMPARE_S "inc_comp_single.cl"
 #define COMPARE_M "inc_comp_multi.cl"
 
-CONSTANT_AS u32a padding[8] =
+CONSTANT_VK u32a padding[8] =
 {
   0x5e4ebf28,
   0x418a754e,
diff --git a/OpenCL/m11500_a0-optimized.cl b/OpenCL/m11500_a0-optimized.cl
index 903190d74..9a77848e9 100644
--- a/OpenCL/m11500_a0-optimized.cl
+++ b/OpenCL/m11500_a0-optimized.cl
@@ -16,7 +16,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a crc32tab[0x100] =
+CONSTANT_VK u32a crc32tab[0x100] =
 {
   0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
   0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
diff --git a/OpenCL/m11500_a1-optimized.cl b/OpenCL/m11500_a1-optimized.cl
index 6ee1b5b6c..06a8b1ebb 100644
--- a/OpenCL/m11500_a1-optimized.cl
+++ b/OpenCL/m11500_a1-optimized.cl
@@ -14,7 +14,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a crc32tab[0x100] =
+CONSTANT_VK u32a crc32tab[0x100] =
 {
   0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
   0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
diff --git a/OpenCL/m11500_a3-optimized.cl b/OpenCL/m11500_a3-optimized.cl
index 2fd20e7bf..698b93325 100644
--- a/OpenCL/m11500_a3-optimized.cl
+++ b/OpenCL/m11500_a3-optimized.cl
@@ -14,7 +14,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u32a crc32tab[0x100] =
+CONSTANT_VK u32a crc32tab[0x100] =
 {
   0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
   0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
diff --git a/OpenCL/m12400-pure.cl b/OpenCL/m12400-pure.cl
index 4c3cf8fe7..43002fca5 100644
--- a/OpenCL/m12400-pure.cl
+++ b/OpenCL/m12400-pure.cl
@@ -60,7 +60,7 @@ typedef struct bsdicrypt_tmp
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x00820200, 0x00020000, 0x80800000, 0x80820200,
@@ -208,7 +208,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m14000_a0-pure.cl b/OpenCL/m14000_a0-pure.cl
index 503e26d28..5a8a3c39c 100644
--- a/OpenCL/m14000_a0-pure.cl
+++ b/OpenCL/m14000_a0-pure.cl
@@ -53,7 +53,7 @@
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     /* nibble 0 */
@@ -209,7 +209,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m14000_a1-pure.cl b/OpenCL/m14000_a1-pure.cl
index 90954adae..f8f55cf2e 100644
--- a/OpenCL/m14000_a1-pure.cl
+++ b/OpenCL/m14000_a1-pure.cl
@@ -51,7 +51,7 @@
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -199,7 +199,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m14100_a0-pure.cl b/OpenCL/m14100_a0-pure.cl
index 17d5bed9d..42b06753b 100644
--- a/OpenCL/m14100_a0-pure.cl
+++ b/OpenCL/m14100_a0-pure.cl
@@ -53,7 +53,7 @@
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     /* nibble 0 */
@@ -209,7 +209,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m14100_a1-pure.cl b/OpenCL/m14100_a1-pure.cl
index 3eba5678c..f44756c2b 100644
--- a/OpenCL/m14100_a1-pure.cl
+++ b/OpenCL/m14100_a1-pure.cl
@@ -51,7 +51,7 @@
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -199,7 +199,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m14100_a3-pure.cl b/OpenCL/m14100_a3-pure.cl
index 616d944d3..854e1c36b 100644
--- a/OpenCL/m14100_a3-pure.cl
+++ b/OpenCL/m14100_a3-pure.cl
@@ -51,7 +51,7 @@
   PERM_OP (l, r, tt,  4, 0x0f0f0f0f);  \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x02080800, 0x00080000, 0x02000002, 0x02080802,
@@ -199,7 +199,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   }
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
diff --git a/OpenCL/m14900_a0-optimized.cl b/OpenCL/m14900_a0-optimized.cl
index f8946baa1..16cf26446 100644
--- a/OpenCL/m14900_a0-optimized.cl
+++ b/OpenCL/m14900_a0-optimized.cl
@@ -16,7 +16,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u8a c_ftable[256] =
+CONSTANT_VK u8a c_ftable[256] =
 {
   0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4,
   0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9,
diff --git a/OpenCL/m14900_a1-optimized.cl b/OpenCL/m14900_a1-optimized.cl
index f672860fe..b9753524c 100644
--- a/OpenCL/m14900_a1-optimized.cl
+++ b/OpenCL/m14900_a1-optimized.cl
@@ -14,7 +14,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u8a c_ftable[256] =
+CONSTANT_VK u8a c_ftable[256] =
 {
   0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4,
   0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9,
diff --git a/OpenCL/m14900_a3-optimized.cl b/OpenCL/m14900_a3-optimized.cl
index 7255cfcec..a64f22df7 100644
--- a/OpenCL/m14900_a3-optimized.cl
+++ b/OpenCL/m14900_a3-optimized.cl
@@ -14,7 +14,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u8a c_ftable[256] =
+CONSTANT_VK u8a c_ftable[256] =
 {
   0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4,
   0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9,
diff --git a/OpenCL/m15600-pure.cl b/OpenCL/m15600-pure.cl
index 05652abf7..e6bad07b5 100644
--- a/OpenCL/m15600-pure.cl
+++ b/OpenCL/m15600-pure.cl
@@ -34,7 +34,7 @@ typedef struct ethereum_pbkdf2
 #define COMPARE_S "inc_comp_single.cl"
 #define COMPARE_M "inc_comp_multi.cl"
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl
index 15bc64b58..3e12e7100 100644
--- a/OpenCL/m15700-pure.cl
+++ b/OpenCL/m15700-pure.cl
@@ -261,7 +261,7 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
   st[4 + s] ^= ~bc0 & bc1;      \
 }
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m16000_a0-pure.cl b/OpenCL/m16000_a0-pure.cl
index 991e033a0..c78cbff01 100644
--- a/OpenCL/m16000_a0-pure.cl
+++ b/OpenCL/m16000_a0-pure.cl
@@ -35,7 +35,7 @@
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x00820200, 0x00020000, 0x80800000, 0x80820200,
@@ -183,7 +183,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
@@ -331,7 +331,7 @@ CONSTANT_AS u32a c_skb[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_tripcode_salt[128] =
+CONSTANT_VK u32a c_tripcode_salt[128] =
 {
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
diff --git a/OpenCL/m16000_a1-pure.cl b/OpenCL/m16000_a1-pure.cl
index 994e30ace..578021fe8 100644
--- a/OpenCL/m16000_a1-pure.cl
+++ b/OpenCL/m16000_a1-pure.cl
@@ -33,7 +33,7 @@
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x00820200, 0x00020000, 0x80800000, 0x80820200,
@@ -181,7 +181,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
@@ -329,7 +329,7 @@ CONSTANT_AS u32a c_skb[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_tripcode_salt[128] =
+CONSTANT_VK u32a c_tripcode_salt[128] =
 {
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
diff --git a/OpenCL/m16000_a3-pure.cl b/OpenCL/m16000_a3-pure.cl
index 17361374f..12e73ed34 100644
--- a/OpenCL/m16000_a3-pure.cl
+++ b/OpenCL/m16000_a3-pure.cl
@@ -33,7 +33,7 @@
   a  = a ^ tt;              \
 }
 
-CONSTANT_AS u32a c_SPtrans[8][64] =
+CONSTANT_VK u32a c_SPtrans[8][64] =
 {
   {
     0x00820200, 0x00020000, 0x80800000, 0x80820200,
@@ -181,7 +181,7 @@ CONSTANT_AS u32a c_SPtrans[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_skb[8][64] =
+CONSTANT_VK u32a c_skb[8][64] =
 {
   {
     0x00000000, 0x00000010, 0x20000000, 0x20000010,
@@ -329,7 +329,7 @@ CONSTANT_AS u32a c_skb[8][64] =
   },
 };
 
-CONSTANT_AS u32a c_tripcode_salt[128] =
+CONSTANT_VK u32a c_tripcode_salt[128] =
 {
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
diff --git a/OpenCL/m16300-pure.cl b/OpenCL/m16300-pure.cl
index fa3d51042..5201a8ccc 100644
--- a/OpenCL/m16300-pure.cl
+++ b/OpenCL/m16300-pure.cl
@@ -36,7 +36,7 @@ typedef struct pbkdf2_sha256_tmp
 
 } pbkdf2_sha256_tmp_t;
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17300_a0-optimized.cl b/OpenCL/m17300_a0-optimized.cl
index 011de4eb5..a1c7bd237 100644
--- a/OpenCL/m17300_a0-optimized.cl
+++ b/OpenCL/m17300_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17300_a1-optimized.cl b/OpenCL/m17300_a1-optimized.cl
index d110afb2d..cc3c5ac5b 100644
--- a/OpenCL/m17300_a1-optimized.cl
+++ b/OpenCL/m17300_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17300_a3-optimized.cl b/OpenCL/m17300_a3-optimized.cl
index 58898f5cb..cf78c55e6 100644
--- a/OpenCL/m17300_a3-optimized.cl
+++ b/OpenCL/m17300_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17400_a0-optimized.cl b/OpenCL/m17400_a0-optimized.cl
index a157070f0..8dbefbd72 100644
--- a/OpenCL/m17400_a0-optimized.cl
+++ b/OpenCL/m17400_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17400_a1-optimized.cl b/OpenCL/m17400_a1-optimized.cl
index 84b14a2b8..96c99c25b 100644
--- a/OpenCL/m17400_a1-optimized.cl
+++ b/OpenCL/m17400_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17400_a3-optimized.cl b/OpenCL/m17400_a3-optimized.cl
index fd2977b51..1ce91475c 100644
--- a/OpenCL/m17400_a3-optimized.cl
+++ b/OpenCL/m17400_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17500_a0-optimized.cl b/OpenCL/m17500_a0-optimized.cl
index dd62c6956..10a029a81 100644
--- a/OpenCL/m17500_a0-optimized.cl
+++ b/OpenCL/m17500_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17500_a1-optimized.cl b/OpenCL/m17500_a1-optimized.cl
index e6a04db79..3a62ba394 100644
--- a/OpenCL/m17500_a1-optimized.cl
+++ b/OpenCL/m17500_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17500_a3-optimized.cl b/OpenCL/m17500_a3-optimized.cl
index 9c6a3c99b..91705b5c4 100644
--- a/OpenCL/m17500_a3-optimized.cl
+++ b/OpenCL/m17500_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17600_a0-optimized.cl b/OpenCL/m17600_a0-optimized.cl
index bf9c3efb6..0b32f69b5 100644
--- a/OpenCL/m17600_a0-optimized.cl
+++ b/OpenCL/m17600_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17600_a1-optimized.cl b/OpenCL/m17600_a1-optimized.cl
index a605df2a8..5d5117736 100644
--- a/OpenCL/m17600_a1-optimized.cl
+++ b/OpenCL/m17600_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17600_a3-optimized.cl b/OpenCL/m17600_a3-optimized.cl
index aed11f579..7dde3b61a 100644
--- a/OpenCL/m17600_a3-optimized.cl
+++ b/OpenCL/m17600_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17700_a0-optimized.cl b/OpenCL/m17700_a0-optimized.cl
index 3bc5f965f..00355e29a 100644
--- a/OpenCL/m17700_a0-optimized.cl
+++ b/OpenCL/m17700_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17700_a1-optimized.cl b/OpenCL/m17700_a1-optimized.cl
index 5dcf2aff7..88527e9b6 100644
--- a/OpenCL/m17700_a1-optimized.cl
+++ b/OpenCL/m17700_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17700_a3-optimized.cl b/OpenCL/m17700_a3-optimized.cl
index bc912a045..70a3f42d0 100644
--- a/OpenCL/m17700_a3-optimized.cl
+++ b/OpenCL/m17700_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17800_a0-optimized.cl b/OpenCL/m17800_a0-optimized.cl
index dc4cffadd..193f9ce6e 100644
--- a/OpenCL/m17800_a0-optimized.cl
+++ b/OpenCL/m17800_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17800_a1-optimized.cl b/OpenCL/m17800_a1-optimized.cl
index 8733184de..cc1e2ca52 100644
--- a/OpenCL/m17800_a1-optimized.cl
+++ b/OpenCL/m17800_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17800_a3-optimized.cl b/OpenCL/m17800_a3-optimized.cl
index f2d497e36..3de33b43a 100644
--- a/OpenCL/m17800_a3-optimized.cl
+++ b/OpenCL/m17800_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17900_a0-optimized.cl b/OpenCL/m17900_a0-optimized.cl
index 5ed128657..1e9e68e9d 100644
--- a/OpenCL/m17900_a0-optimized.cl
+++ b/OpenCL/m17900_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17900_a1-optimized.cl b/OpenCL/m17900_a1-optimized.cl
index d6518aeb8..e26e19987 100644
--- a/OpenCL/m17900_a1-optimized.cl
+++ b/OpenCL/m17900_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m17900_a3-optimized.cl b/OpenCL/m17900_a3-optimized.cl
index 43d2791d9..db78824ad 100644
--- a/OpenCL/m17900_a3-optimized.cl
+++ b/OpenCL/m17900_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m18000_a0-optimized.cl b/OpenCL/m18000_a0-optimized.cl
index 1d587b1db..6a2f4eeb4 100644
--- a/OpenCL/m18000_a0-optimized.cl
+++ b/OpenCL/m18000_a0-optimized.cl
@@ -15,7 +15,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m18000_a1-optimized.cl b/OpenCL/m18000_a1-optimized.cl
index 1964d037a..36caf6052 100644
--- a/OpenCL/m18000_a1-optimized.cl
+++ b/OpenCL/m18000_a1-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m18000_a3-optimized.cl b/OpenCL/m18000_a3-optimized.cl
index a30de8b7e..2dee6304a 100644
--- a/OpenCL/m18000_a3-optimized.cl
+++ b/OpenCL/m18000_a3-optimized.cl
@@ -13,7 +13,7 @@
 #include "inc_simd.cl"
 #endif
 
-CONSTANT_AS u64a keccakf_rndc[24] =
+CONSTANT_VK u64a keccakf_rndc[24] =
 {
   0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
   0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl
index f68d6f82f..2f8e0ea18 100644
--- a/OpenCL/m18600-pure.cl
+++ b/OpenCL/m18600-pure.cl
@@ -38,7 +38,7 @@ typedef struct odf11
 
 // http://www.schneier.com/code/constants.txt
 
-CONSTANT_AS u32a c_sbox0[256] =
+CONSTANT_VK u32a c_sbox0[256] =
 {
   0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
   0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
@@ -106,7 +106,7 @@ CONSTANT_AS u32a c_sbox0[256] =
   0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a
 };
 
-CONSTANT_AS u32a c_sbox1[256] =
+CONSTANT_VK u32a c_sbox1[256] =
 {
   0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
   0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
@@ -174,7 +174,7 @@ CONSTANT_AS u32a c_sbox1[256] =
   0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7
 };
 
-CONSTANT_AS u32a c_sbox2[256] =
+CONSTANT_VK u32a c_sbox2[256] =
 {
   0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
   0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
@@ -242,7 +242,7 @@ CONSTANT_AS u32a c_sbox2[256] =
   0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0
 };
 
-CONSTANT_AS u32a c_sbox3[256] =
+CONSTANT_VK u32a c_sbox3[256] =
 {
   0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
   0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
@@ -310,7 +310,7 @@ CONSTANT_AS u32a c_sbox3[256] =
   0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
 };
 
-CONSTANT_AS u32a c_pbox[18] =
+CONSTANT_VK u32a c_pbox[18] =
 {
   0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
   0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
diff --git a/src/backend.c b/src/backend.c
index 9a162c150..eee101e6c 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -3166,14 +3166,14 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       if (rc_cuEventRecord2 == -1) return -1;
     }
 
-    const int rc_cuEventSynchronize = hc_cuEventSynchronize (hashcat_ctx, device_param->cuda_event2);
-
-    if (rc_cuEventSynchronize == -1) return -1;
-
     const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
 
     if (rc_cuStreamSynchronize == -1) return -1;
 
+    const int rc_cuEventSynchronize = hc_cuEventSynchronize (hashcat_ctx, device_param->cuda_event2);
+
+    if (rc_cuEventSynchronize == -1) return -1;
+
     float exec_ms;
 
     const int rc_cuEventElapsedTime = hc_cuEventElapsedTime (hashcat_ctx, &exec_ms, device_param->cuda_event1, device_param->cuda_event2);

From bbed0cd67ad21f82d590c27dd096f7170e4a3556 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 6 May 2019 15:06:02 +0200
Subject: [PATCH 33/73] Fix test.sh and bitsliced algos

---
 OpenCL/m01500_a3-pure.cl |  8 ++++++++
 OpenCL/m03000_a3-pure.cl |  8 ++++++++
 OpenCL/m14000_a3-pure.cl |  8 ++++++++
 src/backend.c            | 17 +++++++++++++----
 tools/test.sh            |  6 +++---
 5 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl
index 28c9f2573..5c534cd4e 100644
--- a/OpenCL/m01500_a3-pure.cl
+++ b/OpenCL/m01500_a3-pure.cl
@@ -1998,7 +1998,11 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
@@ -2446,7 +2450,11 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl
index 8af1fad35..8f1449780 100644
--- a/OpenCL/m03000_a3-pure.cl
+++ b/OpenCL/m03000_a3-pure.cl
@@ -1830,7 +1830,11 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
@@ -2278,7 +2282,11 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl
index 6cfaee1db..cc98a5dce 100644
--- a/OpenCL/m14000_a3-pure.cl
+++ b/OpenCL/m14000_a3-pure.cl
@@ -2043,7 +2043,11 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
@@ -2555,7 +2559,11 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
    * inner loop
    */
 
+  #ifdef IS_CUDA
+  const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
+  #else
   const u32 pc_pos = get_global_id (1);
+  #endif
 
   const u32 il_pos = pc_pos * 32;
 
diff --git a/src/backend.c b/src/backend.c
index eee101e6c..000e826b1 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -3119,7 +3119,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
       if (rc_cuEventRecord1 == -1) return -1;
 
-      const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
+      const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements / 32, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
 
       if (rc_cuLaunchKernel == -1) return -1;
 
@@ -3525,7 +3525,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
   {
     CUfunction cuda_function = device_param->cuda_function_tm;
 
-    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_tm, NULL);
+    const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_tm, NULL);
 
     if (rc_cuLaunchKernel == -1) return -1;
 
@@ -8526,8 +8526,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5];
       device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf64[6];
 
-      device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c;
-      device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c;
+      if (device_param->is_cuda == true)
+      {
+        device_param->kernel_params_tm[0] = &device_param->cuda_d_bfs_c;
+        device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c;
+      }
+
+      if (device_param->is_opencl == true)
+      {
+        device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c;
+        device_param->kernel_params_tm[1] = &device_param->opencl_d_tm_c;
+      }
     }
 
     device_param->kernel_params_memset_buf32[1] = 0; // value
diff --git a/tools/test.sh b/tools/test.sh
index 6f8b28d5b..d0546a219 100755
--- a/tools/test.sh
+++ b/tools/test.sh
@@ -2477,7 +2477,7 @@ cat << EOF
 
 OPTIONS:
 
-  -V    OpenCL vector-width (either 1, 2, 4 or 8), overrides value from device query :
+  -V    Backend vector-width (either 1, 2, 4 or 8), overrides value from device query :
         '1'         => vector-width 1
         '2'         => vector-width 2 (default)
         '4'         => vector-width 4
@@ -2507,7 +2507,7 @@ OPTIONS:
         'linux'     => Linux operating system (use .bin file extension)
         'macos'     => macOS operating system (use .app file extension)
 
-  -d    Select the OpenCL device :
+  -d    Select the Backend device :
         (int)[,int] => comma separated list of devices (default : 1)
 
   -D    Select the OpenCL device types :
@@ -2866,7 +2866,7 @@ if [ "${PACKAGE}" -eq 0 -o -z "${PACKAGE_FOLDER}" ]; then
           fi
 
           VECTOR=${CUR_WIDTH}
-          OPTS="${OPTS_OLD} --opencl-vector-width ${VECTOR}"
+          OPTS="${OPTS_OLD} --backend-vector-width ${VECTOR}"
 
           if [[ ${IS_SLOW} -eq 1 ]]; then
 

From 8ff8c5d536f36b8fafc8cb65bf821c9857e4910f Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 7 May 2019 09:01:32 +0200
Subject: [PATCH 34/73] Add LOCAL_VK to make use of __shared__

---
 OpenCL/inc_rp.cl              |   2 +-
 OpenCL/inc_vendor.h           |   3 +
 OpenCL/m01100_a0-optimized.cl |   4 +-
 OpenCL/m01100_a1-optimized.cl |   4 +-
 OpenCL/m01100_a3-optimized.cl |  12 ++--
 OpenCL/m01500_a0-pure.cl      |   8 +--
 OpenCL/m01500_a1-pure.cl      |   8 +--
 OpenCL/m02500-pure.cl         |  10 +--
 OpenCL/m02501-pure.cl         |  10 +--
 OpenCL/m02610_a0-optimized.cl |   4 +-
 OpenCL/m02610_a0-pure.cl      |   4 +-
 OpenCL/m02610_a1-optimized.cl |   4 +-
 OpenCL/m02610_a1-pure.cl      |   4 +-
 OpenCL/m02610_a3-optimized.cl |  12 ++--
 OpenCL/m02610_a3-pure.cl      |   4 +-
 OpenCL/m02710_a0-optimized.cl |   4 +-
 OpenCL/m02710_a1-optimized.cl |   4 +-
 OpenCL/m02710_a3-optimized.cl |  12 ++--
 OpenCL/m02810_a0-optimized.cl |   4 +-
 OpenCL/m02810_a0-pure.cl      |   4 +-
 OpenCL/m02810_a1-optimized.cl |   4 +-
 OpenCL/m02810_a1-pure.cl      |   4 +-
 OpenCL/m02810_a3-optimized.cl |  12 ++--
 OpenCL/m02810_a3-pure.cl      |   4 +-
 OpenCL/m03000_a0-pure.cl      |   8 +--
 OpenCL/m03000_a1-pure.cl      |   8 +--
 OpenCL/m03100_a0-optimized.cl |   8 +--
 OpenCL/m03100_a1-optimized.cl |   8 +--
 OpenCL/m03100_a3-optimized.cl |  16 ++---
 OpenCL/m03200-pure.cl         |  48 +++++++-------
 OpenCL/m03710_a0-optimized.cl |   4 +-
 OpenCL/m03710_a0-pure.cl      |   4 +-
 OpenCL/m03710_a1-optimized.cl |   4 +-
 OpenCL/m03710_a1-pure.cl      |   4 +-
 OpenCL/m03710_a3-optimized.cl |  12 ++--
 OpenCL/m03710_a3-pure.cl      |   4 +-
 OpenCL/m03910_a0-optimized.cl |   4 +-
 OpenCL/m03910_a0-pure.cl      |   4 +-
 OpenCL/m03910_a1-optimized.cl |   4 +-
 OpenCL/m03910_a1-pure.cl      |   4 +-
 OpenCL/m03910_a3-optimized.cl |  12 ++--
 OpenCL/m03910_a3-pure.cl      |   4 +-
 OpenCL/m04010_a0-optimized.cl |   4 +-
 OpenCL/m04010_a0-pure.cl      |   4 +-
 OpenCL/m04010_a1-optimized.cl |   4 +-
 OpenCL/m04010_a1-pure.cl      |   4 +-
 OpenCL/m04010_a3-optimized.cl |  12 ++--
 OpenCL/m04010_a3-pure.cl      |   4 +-
 OpenCL/m04110_a0-optimized.cl |   4 +-
 OpenCL/m04110_a0-pure.cl      |   4 +-
 OpenCL/m04110_a1-optimized.cl |   4 +-
 OpenCL/m04110_a1-pure.cl      |   4 +-
 OpenCL/m04110_a3-optimized.cl |  12 ++--
 OpenCL/m04110_a3-pure.cl      |   4 +-
 OpenCL/m04310_a0-optimized.cl |   4 +-
 OpenCL/m04310_a0-pure.cl      |   4 +-
 OpenCL/m04310_a1-optimized.cl |   4 +-
 OpenCL/m04310_a1-pure.cl      |   4 +-
 OpenCL/m04310_a3-optimized.cl |  12 ++--
 OpenCL/m04310_a3-pure.cl      |   4 +-
 OpenCL/m04400_a0-optimized.cl |   4 +-
 OpenCL/m04400_a0-pure.cl      |   4 +-
 OpenCL/m04400_a1-optimized.cl |   4 +-
 OpenCL/m04400_a1-pure.cl      |   4 +-
 OpenCL/m04400_a3-optimized.cl |  12 ++--
 OpenCL/m04400_a3-pure.cl      |   4 +-
 OpenCL/m04500_a0-optimized.cl |   4 +-
 OpenCL/m04500_a0-pure.cl      |   4 +-
 OpenCL/m04500_a1-optimized.cl |   4 +-
 OpenCL/m04500_a1-pure.cl      |   4 +-
 OpenCL/m04500_a3-optimized.cl |  12 ++--
 OpenCL/m04500_a3-pure.cl      |   4 +-
 OpenCL/m04520_a0-optimized.cl |   4 +-
 OpenCL/m04520_a0-pure.cl      |   4 +-
 OpenCL/m04520_a1-optimized.cl |   4 +-
 OpenCL/m04520_a1-pure.cl      |   4 +-
 OpenCL/m04520_a3-optimized.cl |  12 ++--
 OpenCL/m04520_a3-pure.cl      |   4 +-
 OpenCL/m04700_a0-optimized.cl |   4 +-
 OpenCL/m04700_a0-pure.cl      |   4 +-
 OpenCL/m04700_a1-optimized.cl |   4 +-
 OpenCL/m04700_a1-pure.cl      |   4 +-
 OpenCL/m04700_a3-optimized.cl |  12 ++--
 OpenCL/m04700_a3-pure.cl      |   4 +-
 OpenCL/m05300_a0-optimized.cl |   8 +--
 OpenCL/m05300_a1-optimized.cl |   8 +--
 OpenCL/m05300_a3-optimized.cl |  24 +++----
 OpenCL/m05400_a0-optimized.cl |   8 +--
 OpenCL/m05400_a1-optimized.cl |   8 +--
 OpenCL/m05400_a3-optimized.cl |  24 +++----
 OpenCL/m05500_a0-optimized.cl |   8 +--
 OpenCL/m05500_a0-pure.cl      |   8 +--
 OpenCL/m05500_a1-optimized.cl |   8 +--
 OpenCL/m05500_a1-pure.cl      |   8 +--
 OpenCL/m05500_a3-optimized.cl |  24 +++----
 OpenCL/m05500_a3-pure.cl      |   8 +--
 OpenCL/m05600_a0-optimized.cl |   8 +--
 OpenCL/m05600_a1-optimized.cl |   8 +--
 OpenCL/m05600_a3-optimized.cl |  24 +++----
 OpenCL/m05800-optimized.cl    |   4 +-
 OpenCL/m05800-pure.cl         |   4 +-
 OpenCL/m06100_a0-optimized.cl |   8 +--
 OpenCL/m06100_a0-pure.cl      |   8 +--
 OpenCL/m06100_a1-optimized.cl |   8 +--
 OpenCL/m06100_a1-pure.cl      |   8 +--
 OpenCL/m06100_a3-optimized.cl |  16 ++---
 OpenCL/m06100_a3-pure.cl      |   8 +--
 OpenCL/m06211-pure.cl         |  22 +++----
 OpenCL/m06212-pure.cl         |  22 +++----
 OpenCL/m06213-pure.cl         |  22 +++----
 OpenCL/m06221-pure.cl         |  22 +++----
 OpenCL/m06222-pure.cl         |  22 +++----
 OpenCL/m06223-pure.cl         |  22 +++----
 OpenCL/m06231-pure.cl         |  34 +++++-----
 OpenCL/m06232-pure.cl         |  34 +++++-----
 OpenCL/m06233-pure.cl         |  34 +++++-----
 OpenCL/m06600-pure.cl         |  20 +++---
 OpenCL/m06800-pure.cl         |  20 +++---
 OpenCL/m06900_a0-optimized.cl |   4 +-
 OpenCL/m06900_a1-optimized.cl |   4 +-
 OpenCL/m06900_a3-optimized.cl |   8 +--
 OpenCL/m07500_a0-optimized.cl |  10 +--
 OpenCL/m07500_a0-pure.cl      |  10 +--
 OpenCL/m07500_a1-optimized.cl |  10 +--
 OpenCL/m07500_a1-pure.cl      |  10 +--
 OpenCL/m07500_a3-optimized.cl |  18 ++---
 OpenCL/m07500_a3-pure.cl      |  10 +--
 OpenCL/m08000_a0-optimized.cl |   8 +--
 OpenCL/m08000_a1-optimized.cl |   8 +--
 OpenCL/m08000_a3-optimized.cl |  24 +++----
 OpenCL/m08400_a0-optimized.cl |   4 +-
 OpenCL/m08400_a0-pure.cl      |   4 +-
 OpenCL/m08400_a1-optimized.cl |   4 +-
 OpenCL/m08400_a1-pure.cl      |   4 +-
 OpenCL/m08400_a3-optimized.cl |  12 ++--
 OpenCL/m08400_a3-pure.cl      |   4 +-
 OpenCL/m08500_a0-pure.cl      |   8 +--
 OpenCL/m08500_a1-pure.cl      |   8 +--
 OpenCL/m08500_a3-pure.cl      |   8 +--
 OpenCL/m08600_a0-pure.cl      |   4 +-
 OpenCL/m08600_a1-pure.cl      |   4 +-
 OpenCL/m08600_a3-pure.cl      |   4 +-
 OpenCL/m08700_a0-optimized.cl |   8 +--
 OpenCL/m08700_a1-optimized.cl |   8 +--
 OpenCL/m08700_a3-optimized.cl |  24 +++----
 OpenCL/m08800-pure.cl         |  20 +++---
 OpenCL/m09000-pure.cl         |  32 ++++-----
 OpenCL/m09100-pure.cl         |   4 +-
 OpenCL/m09400-pure.cl         |  20 +++---
 OpenCL/m09500-pure.cl         |  20 +++---
 OpenCL/m09600-pure.cl         |  20 +++---
 OpenCL/m09700_a0-optimized.cl |  10 +--
 OpenCL/m09700_a1-optimized.cl |  10 +--
 OpenCL/m09700_a3-optimized.cl |  18 ++---
 OpenCL/m09710_a0-optimized.cl |  10 +--
 OpenCL/m09710_a1-optimized.cl |  10 +--
 OpenCL/m09710_a3-optimized.cl |  10 +--
 OpenCL/m09800_a0-optimized.cl |  10 +--
 OpenCL/m09800_a1-optimized.cl |  10 +--
 OpenCL/m09800_a3-optimized.cl |  18 ++---
 OpenCL/m09810_a0-optimized.cl |  10 +--
 OpenCL/m09810_a1-optimized.cl |  10 +--
 OpenCL/m09810_a3-optimized.cl |  18 ++---
 OpenCL/m10400_a0-optimized.cl |  10 +--
 OpenCL/m10400_a1-optimized.cl |  10 +--
 OpenCL/m10400_a3-optimized.cl |  18 ++---
 OpenCL/m10410_a0-optimized.cl |  10 +--
 OpenCL/m10410_a1-optimized.cl |  10 +--
 OpenCL/m10410_a3-optimized.cl |  18 ++---
 OpenCL/m10500-pure.cl         |   6 +-
 OpenCL/m10700-optimized.cl    |  10 +--
 OpenCL/m10700-pure.cl         |  10 +--
 OpenCL/m11100_a0-optimized.cl |   4 +-
 OpenCL/m11100_a0-pure.cl      |   4 +-
 OpenCL/m11100_a1-optimized.cl |   4 +-
 OpenCL/m11100_a1-pure.cl      |   4 +-
 OpenCL/m11100_a3-optimized.cl |  12 ++--
 OpenCL/m11100_a3-pure.cl      |   4 +-
 OpenCL/m11300-pure.cl         |  20 +++---
 OpenCL/m11400_a0-pure.cl      |   4 +-
 OpenCL/m11400_a1-pure.cl      |   4 +-
 OpenCL/m11400_a3-pure.cl      |   4 +-
 OpenCL/m11700_a0-optimized.cl |   4 +-
 OpenCL/m11700_a0-pure.cl      |   4 +-
 OpenCL/m11700_a1-optimized.cl |   4 +-
 OpenCL/m11700_a1-pure.cl      |   4 +-
 OpenCL/m11700_a3-optimized.cl |  12 ++--
 OpenCL/m11700_a3-pure.cl      |   4 +-
 OpenCL/m11750_a0-pure.cl      |   4 +-
 OpenCL/m11750_a1-pure.cl      |   4 +-
 OpenCL/m11750_a3-pure.cl      |   4 +-
 OpenCL/m11760_a0-pure.cl      |   4 +-
 OpenCL/m11760_a1-pure.cl      |   4 +-
 OpenCL/m11760_a3-pure.cl      |   4 +-
 OpenCL/m11800_a0-optimized.cl |   4 +-
 OpenCL/m11800_a0-pure.cl      |   4 +-
 OpenCL/m11800_a1-optimized.cl |   4 +-
 OpenCL/m11800_a1-pure.cl      |   4 +-
 OpenCL/m11800_a3-optimized.cl |  12 ++--
 OpenCL/m11800_a3-pure.cl      |   4 +-
 OpenCL/m11850_a0-pure.cl      |   4 +-
 OpenCL/m11850_a1-pure.cl      |   4 +-
 OpenCL/m11850_a3-pure.cl      |   4 +-
 OpenCL/m11860_a0-pure.cl      |   4 +-
 OpenCL/m11860_a1-pure.cl      |   4 +-
 OpenCL/m11860_a3-pure.cl      |   4 +-
 OpenCL/m12400-pure.cl         |   8 +--
 OpenCL/m12500-pure.cl         |  20 +++---
 OpenCL/m12600_a0-optimized.cl |   4 +-
 OpenCL/m12600_a0-pure.cl      |   4 +-
 OpenCL/m12600_a1-optimized.cl |   4 +-
 OpenCL/m12600_a1-pure.cl      |   4 +-
 OpenCL/m12600_a3-optimized.cl |  12 ++--
 OpenCL/m12600_a3-pure.cl      |   4 +-
 OpenCL/m12700-pure.cl         |  20 +++---
 OpenCL/m12800-pure.cl         |   2 +-
 OpenCL/m13100_a0-optimized.cl |  10 +--
 OpenCL/m13100_a0-pure.cl      |  10 +--
 OpenCL/m13100_a1-optimized.cl |  10 +--
 OpenCL/m13100_a1-pure.cl      |  10 +--
 OpenCL/m13100_a3-optimized.cl |  18 ++---
 OpenCL/m13100_a3-pure.cl      |  10 +--
 OpenCL/m13200-pure.cl         |  20 +++---
 OpenCL/m13400-pure.cl         |  30 ++++-----
 OpenCL/m13711-pure.cl         |  42 ++++++------
 OpenCL/m13712-pure.cl         |  42 ++++++------
 OpenCL/m13713-pure.cl         |  42 ++++++------
 OpenCL/m13721-pure.cl         |  42 ++++++------
 OpenCL/m13722-pure.cl         |  42 ++++++------
 OpenCL/m13723-pure.cl         |  42 ++++++------
 OpenCL/m13731-pure.cl         |  54 +++++++--------
 OpenCL/m13732-pure.cl         |  54 +++++++--------
 OpenCL/m13733-pure.cl         |  54 +++++++--------
 OpenCL/m13751-pure.cl         |  42 ++++++------
 OpenCL/m13752-pure.cl         |  42 ++++++------
 OpenCL/m13753-pure.cl         |  42 ++++++------
 OpenCL/m13771-pure.cl         |  46 ++++++-------
 OpenCL/m13772-pure.cl         |  46 ++++++-------
 OpenCL/m13773-pure.cl         |  46 ++++++-------
 OpenCL/m13800_a0-optimized.cl |   4 +-
 OpenCL/m13800_a1-optimized.cl |   4 +-
 OpenCL/m13800_a3-optimized.cl |  12 ++--
 OpenCL/m13900_a0-optimized.cl |   4 +-
 OpenCL/m13900_a0-pure.cl      |   4 +-
 OpenCL/m13900_a1-optimized.cl |   4 +-
 OpenCL/m13900_a1-pure.cl      |   4 +-
 OpenCL/m13900_a3-optimized.cl |  12 ++--
 OpenCL/m13900_a3-pure.cl      |   4 +-
 OpenCL/m14000_a0-pure.cl      |   8 +--
 OpenCL/m14000_a1-pure.cl      |   8 +--
 OpenCL/m14100_a0-pure.cl      |   8 +--
 OpenCL/m14100_a1-pure.cl      |   8 +--
 OpenCL/m14100_a3-pure.cl      |   8 +--
 OpenCL/m14400_a0-optimized.cl |   4 +-
 OpenCL/m14400_a0-pure.cl      |   4 +-
 OpenCL/m14400_a1-optimized.cl |   4 +-
 OpenCL/m14400_a1-pure.cl      |   4 +-
 OpenCL/m14400_a3-optimized.cl |  12 ++--
 OpenCL/m14400_a3-pure.cl      |   4 +-
 OpenCL/m14611-pure.cl         |  20 +++---
 OpenCL/m14621-pure.cl         |  20 +++---
 OpenCL/m14631-pure.cl         |  20 +++---
 OpenCL/m14641-pure.cl         |  20 +++---
 OpenCL/m14700-pure.cl         |  20 +++---
 OpenCL/m14800-pure.cl         |  20 +++---
 OpenCL/m14900_a0-optimized.cl |   4 +-
 OpenCL/m14900_a1-optimized.cl |   4 +-
 OpenCL/m14900_a3-optimized.cl |   4 +-
 OpenCL/m15300-pure.cl         |   4 +-
 OpenCL/m15900-pure.cl         |  20 +++---
 OpenCL/m16000_a0-pure.cl      |  12 ++--
 OpenCL/m16000_a1-pure.cl      |  12 ++--
 OpenCL/m16000_a3-pure.cl      |  12 ++--
 OpenCL/m16200-pure.cl         |  20 +++---
 OpenCL/m16300-pure.cl         |  20 +++---
 OpenCL/m16600_a0-optimized.cl |  40 ++++++------
 OpenCL/m16600_a0-pure.cl      |  40 ++++++------
 OpenCL/m16600_a1-optimized.cl |  40 ++++++------
 OpenCL/m16600_a1-pure.cl      |  40 ++++++------
 OpenCL/m16600_a3-optimized.cl | 120 +++++++++++++++++-----------------
 OpenCL/m16600_a3-pure.cl      |  40 ++++++------
 OpenCL/m18200_a0-optimized.cl |  10 +--
 OpenCL/m18200_a0-pure.cl      |  10 +--
 OpenCL/m18200_a1-optimized.cl |  10 +--
 OpenCL/m18200_a1-pure.cl      |  10 +--
 OpenCL/m18200_a3-optimized.cl |  18 ++---
 OpenCL/m18200_a3-pure.cl      |  10 +--
 OpenCL/m18300-pure.cl         |  20 +++---
 OpenCL/m18400-pure.cl         |  20 +++---
 OpenCL/m18500_a0-pure.cl      |   4 +-
 OpenCL/m18500_a1-pure.cl      |   4 +-
 OpenCL/m18500_a3-pure.cl      |   4 +-
 OpenCL/m18600-pure.cl         |  16 ++---
 OpenCL/m18900-pure.cl         |  20 +++---
 OpenCL/m19500_a0-pure.cl      |   4 +-
 OpenCL/m19500_a1-pure.cl      |   4 +-
 OpenCL/m19500_a3-pure.cl      |   4 +-
 OpenCL/m19600-pure.cl         |  20 +++---
 OpenCL/m19700-pure.cl         |  20 +++---
 OpenCL/m19800-pure.cl         |  20 +++---
 OpenCL/m19900-pure.cl         |  20 +++---
 OpenCL/m20011-pure.cl         |  20 +++---
 OpenCL/m20012-pure.cl         |  20 +++---
 OpenCL/m20013-pure.cl         |  20 +++---
 src/backend.c                 |  27 ++++++++
 src/modules/module_03200.c    |  12 +++-
 306 files changed, 1881 insertions(+), 1841 deletions(-)

diff --git a/OpenCL/inc_rp.cl b/OpenCL/inc_rp.cl
index 5c3905bdd..d67127317 100644
--- a/OpenCL/inc_rp.cl
+++ b/OpenCL/inc_rp.cl
@@ -15,7 +15,7 @@
 
 #ifdef REAL_SHM
 #define COPY_PW(x)                \
-  LOCAL_AS pw_t s_pws[64];         \
+  LOCAL_VK pw_t s_pws[64];         \
   s_pws[get_local_id (0)] = (x);
 #else
 #define COPY_PW(x)                \
diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index 279c87c02..f2f201e19 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -18,18 +18,21 @@
 #define CONSTANT_VK
 #define CONSTANT_AS
 #define GLOBAL_AS
+#define LOCAL_VK
 #define LOCAL_AS
 #define KERNEL_FQ
 #elif defined IS_CUDA
 #define CONSTANT_VK __constant__
 #define CONSTANT_AS
 #define GLOBAL_AS
+#define LOCAL_VK    __shared__
 #define LOCAL_AS
 #define KERNEL_FQ   extern "C" __global__
 #elif defined IS_OPENCL
 #define CONSTANT_VK __constant
 #define CONSTANT_AS __constant
 #define GLOBAL_AS   __global
+#define LOCAL_VK    __local
 #define LOCAL_AS    __local
 #define KERNEL_FQ   __kernel
 #endif
diff --git a/OpenCL/m01100_a0-optimized.cl b/OpenCL/m01100_a0-optimized.cl
index 5963097df..ba7a08b55 100644
--- a/OpenCL/m01100_a0-optimized.cl
+++ b/OpenCL/m01100_a0-optimized.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_RULES ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
@@ -262,7 +262,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_RULES ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
diff --git a/OpenCL/m01100_a1-optimized.cl b/OpenCL/m01100_a1-optimized.cl
index 79e758fc5..466e3ff30 100644
--- a/OpenCL/m01100_a1-optimized.cl
+++ b/OpenCL/m01100_a1-optimized.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_BASIC ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
@@ -322,7 +322,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_BASIC ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
diff --git a/OpenCL/m01100_a3-optimized.cl b/OpenCL/m01100_a3-optimized.cl
index 9fbbf7952..a7da9f741 100644
--- a/OpenCL/m01100_a3-optimized.cl
+++ b/OpenCL/m01100_a3-optimized.cl
@@ -533,7 +533,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_VECTOR ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
@@ -587,7 +587,7 @@ KERNEL_FQ void m01100_m08 (KERN_ATTR_VECTOR ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
@@ -641,7 +641,7 @@ KERNEL_FQ void m01100_m16 (KERN_ATTR_VECTOR ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
@@ -695,7 +695,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_VECTOR ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
@@ -749,7 +749,7 @@ KERNEL_FQ void m01100_s08 (KERN_ATTR_VECTOR ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
@@ -803,7 +803,7 @@ KERNEL_FQ void m01100_s16 (KERN_ATTR_VECTOR ())
    * salt
    */
 
-  LOCAL_AS salt_t s_salt_buf[1];
+  LOCAL_VK salt_t s_salt_buf[1];
 
   if (lid == 0)
   {
diff --git a/OpenCL/m01500_a0-pure.cl b/OpenCL/m01500_a0-pure.cl
index 944f04de0..9614ba7d9 100644
--- a/OpenCL/m01500_a0-pure.cl
+++ b/OpenCL/m01500_a0-pure.cl
@@ -496,8 +496,8 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -580,8 +580,8 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m01500_a1-pure.cl b/OpenCL/m01500_a1-pure.cl
index 3bcf6344f..8f7565fa3 100644
--- a/OpenCL/m01500_a1-pure.cl
+++ b/OpenCL/m01500_a1-pure.cl
@@ -494,8 +494,8 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -657,8 +657,8 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl
index 13803488b..95f97fb81 100644
--- a/OpenCL/m02500-pure.cl
+++ b/OpenCL/m02500-pure.cl
@@ -666,11 +666,11 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)
   const u64 lid = get_local_id (0);
   const u64 lsz = get_local_size (0);
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02501-pure.cl b/OpenCL/m02501-pure.cl
index 7572cf5d8..43b035e10 100644
--- a/OpenCL/m02501-pure.cl
+++ b/OpenCL/m02501-pure.cl
@@ -536,11 +536,11 @@ KERNEL_FQ void m02501_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t))
   const u64 lid = get_local_id (0);
   const u64 lsz = get_local_size (0);
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02610_a0-optimized.cl b/OpenCL/m02610_a0-optimized.cl
index 9d183a451..82e9dcfb4 100644
--- a/OpenCL/m02610_a0-optimized.cl
+++ b/OpenCL/m02610_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -326,7 +326,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02610_a0-pure.cl b/OpenCL/m02610_a0-pure.cl
index 425a8ff3b..6f898154a 100644
--- a/OpenCL/m02610_a0-pure.cl
+++ b/OpenCL/m02610_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -145,7 +145,7 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02610_a1-optimized.cl b/OpenCL/m02610_a1-optimized.cl
index 869e9b0ef..c697faa2e 100644
--- a/OpenCL/m02610_a1-optimized.cl
+++ b/OpenCL/m02610_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -383,7 +383,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02610_a1-pure.cl b/OpenCL/m02610_a1-pure.cl
index 406379cce..0db5b5a50 100644
--- a/OpenCL/m02610_a1-pure.cl
+++ b/OpenCL/m02610_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -141,7 +141,7 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02610_a3-optimized.cl b/OpenCL/m02610_a3-optimized.cl
index ff92796b0..eea27f0c4 100644
--- a/OpenCL/m02610_a3-optimized.cl
+++ b/OpenCL/m02610_a3-optimized.cl
@@ -606,7 +606,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -676,7 +676,7 @@ KERNEL_FQ void m02610_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -746,7 +746,7 @@ KERNEL_FQ void m02610_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -816,7 +816,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -886,7 +886,7 @@ KERNEL_FQ void m02610_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -956,7 +956,7 @@ KERNEL_FQ void m02610_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02610_a3-pure.cl b/OpenCL/m02610_a3-pure.cl
index 5e5aeeefe..7aadbe278 100644
--- a/OpenCL/m02610_a3-pure.cl
+++ b/OpenCL/m02610_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -154,7 +154,7 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02710_a0-optimized.cl b/OpenCL/m02710_a0-optimized.cl
index 00351a75d..23f08eae5 100644
--- a/OpenCL/m02710_a0-optimized.cl
+++ b/OpenCL/m02710_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -411,7 +411,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02710_a1-optimized.cl b/OpenCL/m02710_a1-optimized.cl
index 84b8d5bbf..2c1aa24dc 100644
--- a/OpenCL/m02710_a1-optimized.cl
+++ b/OpenCL/m02710_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -468,7 +468,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02710_a3-optimized.cl b/OpenCL/m02710_a3-optimized.cl
index fd3174776..4de5c2a47 100644
--- a/OpenCL/m02710_a3-optimized.cl
+++ b/OpenCL/m02710_a3-optimized.cl
@@ -775,7 +775,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -845,7 +845,7 @@ KERNEL_FQ void m02710_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -915,7 +915,7 @@ KERNEL_FQ void m02710_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -985,7 +985,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1055,7 +1055,7 @@ KERNEL_FQ void m02710_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1125,7 +1125,7 @@ KERNEL_FQ void m02710_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02810_a0-optimized.cl b/OpenCL/m02810_a0-optimized.cl
index 73ea2bc99..7e35b7ac2 100644
--- a/OpenCL/m02810_a0-optimized.cl
+++ b/OpenCL/m02810_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -410,7 +410,7 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02810_a0-pure.cl b/OpenCL/m02810_a0-pure.cl
index c4b4e1130..16a4bc77f 100644
--- a/OpenCL/m02810_a0-pure.cl
+++ b/OpenCL/m02810_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -168,7 +168,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02810_a1-optimized.cl b/OpenCL/m02810_a1-optimized.cl
index 027ad454e..e62cd80e4 100644
--- a/OpenCL/m02810_a1-optimized.cl
+++ b/OpenCL/m02810_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -467,7 +467,7 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02810_a1-pure.cl b/OpenCL/m02810_a1-pure.cl
index 1d15f049d..330909f26 100644
--- a/OpenCL/m02810_a1-pure.cl
+++ b/OpenCL/m02810_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -164,7 +164,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02810_a3-optimized.cl b/OpenCL/m02810_a3-optimized.cl
index 7d3067012..25d0c44d0 100644
--- a/OpenCL/m02810_a3-optimized.cl
+++ b/OpenCL/m02810_a3-optimized.cl
@@ -773,7 +773,7 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -843,7 +843,7 @@ KERNEL_FQ void m02810_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -913,7 +913,7 @@ KERNEL_FQ void m02810_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -983,7 +983,7 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1053,7 +1053,7 @@ KERNEL_FQ void m02810_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1123,7 +1123,7 @@ KERNEL_FQ void m02810_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m02810_a3-pure.cl b/OpenCL/m02810_a3-pure.cl
index 63c6e2aef..42fd50213 100644
--- a/OpenCL/m02810_a3-pure.cl
+++ b/OpenCL/m02810_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -177,7 +177,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03000_a0-pure.cl b/OpenCL/m03000_a0-pure.cl
index a2eb1ad60..945043f25 100644
--- a/OpenCL/m03000_a0-pure.cl
+++ b/OpenCL/m03000_a0-pure.cl
@@ -506,8 +506,8 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -591,8 +591,8 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m03000_a1-pure.cl b/OpenCL/m03000_a1-pure.cl
index 92665dda4..2f4d97572 100644
--- a/OpenCL/m03000_a1-pure.cl
+++ b/OpenCL/m03000_a1-pure.cl
@@ -504,8 +504,8 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -668,8 +668,8 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m03100_a0-optimized.cl b/OpenCL/m03100_a0-optimized.cl
index 25104232d..524e37174 100644
--- a/OpenCL/m03100_a0-optimized.cl
+++ b/OpenCL/m03100_a0-optimized.cl
@@ -33,8 +33,8 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -265,8 +265,8 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m03100_a1-optimized.cl b/OpenCL/m03100_a1-optimized.cl
index a4913d622..69d4162bf 100644
--- a/OpenCL/m03100_a1-optimized.cl
+++ b/OpenCL/m03100_a1-optimized.cl
@@ -31,8 +31,8 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -323,8 +323,8 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m03100_a3-optimized.cl b/OpenCL/m03100_a3-optimized.cl
index a6f687f61..b576e67b3 100644
--- a/OpenCL/m03100_a3-optimized.cl
+++ b/OpenCL/m03100_a3-optimized.cl
@@ -429,8 +429,8 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -512,8 +512,8 @@ KERNEL_FQ void m03100_m08 (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -599,8 +599,8 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -682,8 +682,8 @@ KERNEL_FQ void m03100_s08 (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m03200-pure.cl b/OpenCL/m03200-pure.cl
index 938cb1f48..92c5775ea 100644
--- a/OpenCL/m03200-pure.cl
+++ b/OpenCL/m03200-pure.cl
@@ -444,15 +444,15 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03
    * do the key setup
    */
 
-  LOCAL_AS u32 S0_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S1_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S2_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S3_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_AS u32 *S0 = S0_all[lid];
-  LOCAL_AS u32 *S1 = S1_all[lid];
-  LOCAL_AS u32 *S2 = S2_all[lid];
-  LOCAL_AS u32 *S3 = S3_all[lid];
+  LOCAL_VK u32 *S0 = S0_all[lid];
+  LOCAL_VK u32 *S1 = S1_all[lid];
+  LOCAL_VK u32 *S2 = S2_all[lid];
+  LOCAL_VK u32 *S3 = S3_all[lid];
 
   // initstate
 
@@ -613,15 +613,15 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03
     P[i] = tmps[gid].P[i];
   }
 
-  LOCAL_AS u32 S0_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S1_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S2_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S3_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_AS u32 *S0 = S0_all[lid];
-  LOCAL_AS u32 *S1 = S1_all[lid];
-  LOCAL_AS u32 *S2 = S2_all[lid];
-  LOCAL_AS u32 *S3 = S3_all[lid];
+  LOCAL_VK u32 *S0 = S0_all[lid];
+  LOCAL_VK u32 *S1 = S1_all[lid];
+  LOCAL_VK u32 *S2 = S2_all[lid];
+  LOCAL_VK u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {
@@ -798,15 +798,15 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03
     P[i] = tmps[gid].P[i];
   }
 
-  LOCAL_AS u32 S0_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S1_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S2_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S3_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_AS u32 *S0 = S0_all[lid];
-  LOCAL_AS u32 *S1 = S1_all[lid];
-  LOCAL_AS u32 *S2 = S2_all[lid];
-  LOCAL_AS u32 *S3 = S3_all[lid];
+  LOCAL_VK u32 *S0 = S0_all[lid];
+  LOCAL_VK u32 *S1 = S1_all[lid];
+  LOCAL_VK u32 *S2 = S2_all[lid];
+  LOCAL_VK u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {
diff --git a/OpenCL/m03710_a0-optimized.cl b/OpenCL/m03710_a0-optimized.cl
index 43c7ef379..2e1a9316f 100644
--- a/OpenCL/m03710_a0-optimized.cl
+++ b/OpenCL/m03710_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -357,7 +357,7 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03710_a0-pure.cl b/OpenCL/m03710_a0-pure.cl
index 109bb82a4..033e57336 100644
--- a/OpenCL/m03710_a0-pure.cl
+++ b/OpenCL/m03710_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -158,7 +158,7 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03710_a1-optimized.cl b/OpenCL/m03710_a1-optimized.cl
index 90b0b86a9..9d3f6c23e 100644
--- a/OpenCL/m03710_a1-optimized.cl
+++ b/OpenCL/m03710_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -414,7 +414,7 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03710_a1-pure.cl b/OpenCL/m03710_a1-pure.cl
index 15d808240..eb2ed10e8 100644
--- a/OpenCL/m03710_a1-pure.cl
+++ b/OpenCL/m03710_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -154,7 +154,7 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03710_a3-optimized.cl b/OpenCL/m03710_a3-optimized.cl
index d5ffd7a6c..f1dbd2d80 100644
--- a/OpenCL/m03710_a3-optimized.cl
+++ b/OpenCL/m03710_a3-optimized.cl
@@ -633,7 +633,7 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -737,7 +737,7 @@ KERNEL_FQ void m03710_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -807,7 +807,7 @@ KERNEL_FQ void m03710_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -877,7 +877,7 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -947,7 +947,7 @@ KERNEL_FQ void m03710_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -983,7 +983,7 @@ KERNEL_FQ void m03710_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03710_a3-pure.cl b/OpenCL/m03710_a3-pure.cl
index 56f90d1b9..4311184ce 100644
--- a/OpenCL/m03710_a3-pure.cl
+++ b/OpenCL/m03710_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -167,7 +167,7 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03910_a0-optimized.cl b/OpenCL/m03910_a0-optimized.cl
index 7c7d0413d..e3b540658 100644
--- a/OpenCL/m03910_a0-optimized.cl
+++ b/OpenCL/m03910_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -410,7 +410,7 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03910_a0-pure.cl b/OpenCL/m03910_a0-pure.cl
index aa1d57b1b..ea196e051 100644
--- a/OpenCL/m03910_a0-pure.cl
+++ b/OpenCL/m03910_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -168,7 +168,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03910_a1-optimized.cl b/OpenCL/m03910_a1-optimized.cl
index 2ef98cd37..c28bace04 100644
--- a/OpenCL/m03910_a1-optimized.cl
+++ b/OpenCL/m03910_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -467,7 +467,7 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03910_a1-pure.cl b/OpenCL/m03910_a1-pure.cl
index baeb1a49e..7608787ed 100644
--- a/OpenCL/m03910_a1-pure.cl
+++ b/OpenCL/m03910_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -164,7 +164,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03910_a3-optimized.cl b/OpenCL/m03910_a3-optimized.cl
index faf3eef4e..d8a42b48c 100644
--- a/OpenCL/m03910_a3-optimized.cl
+++ b/OpenCL/m03910_a3-optimized.cl
@@ -773,7 +773,7 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -843,7 +843,7 @@ KERNEL_FQ void m03910_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -913,7 +913,7 @@ KERNEL_FQ void m03910_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -983,7 +983,7 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1053,7 +1053,7 @@ KERNEL_FQ void m03910_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1123,7 +1123,7 @@ KERNEL_FQ void m03910_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m03910_a3-pure.cl b/OpenCL/m03910_a3-pure.cl
index caadebe9c..873c5485e 100644
--- a/OpenCL/m03910_a3-pure.cl
+++ b/OpenCL/m03910_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -177,7 +177,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04010_a0-optimized.cl b/OpenCL/m04010_a0-optimized.cl
index 43aa224d6..e7b4c6fd0 100644
--- a/OpenCL/m04010_a0-optimized.cl
+++ b/OpenCL/m04010_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -383,7 +383,7 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04010_a0-pure.cl b/OpenCL/m04010_a0-pure.cl
index 3a61a7461..6763f5c72 100644
--- a/OpenCL/m04010_a0-pure.cl
+++ b/OpenCL/m04010_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -149,7 +149,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04010_a1-optimized.cl b/OpenCL/m04010_a1-optimized.cl
index db8887b8c..70ee618f5 100644
--- a/OpenCL/m04010_a1-optimized.cl
+++ b/OpenCL/m04010_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -439,7 +439,7 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04010_a1-pure.cl b/OpenCL/m04010_a1-pure.cl
index 4f25cb1c6..856ec75ec 100644
--- a/OpenCL/m04010_a1-pure.cl
+++ b/OpenCL/m04010_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -145,7 +145,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04010_a3-optimized.cl b/OpenCL/m04010_a3-optimized.cl
index 7be89bdf2..8e3297cf7 100644
--- a/OpenCL/m04010_a3-optimized.cl
+++ b/OpenCL/m04010_a3-optimized.cl
@@ -673,7 +673,7 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -777,7 +777,7 @@ KERNEL_FQ void m04010_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -847,7 +847,7 @@ KERNEL_FQ void m04010_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -917,7 +917,7 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -987,7 +987,7 @@ KERNEL_FQ void m04010_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1023,7 +1023,7 @@ KERNEL_FQ void m04010_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04010_a3-pure.cl b/OpenCL/m04010_a3-pure.cl
index 2c8c112c0..b3b855041 100644
--- a/OpenCL/m04010_a3-pure.cl
+++ b/OpenCL/m04010_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -162,7 +162,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04110_a0-optimized.cl b/OpenCL/m04110_a0-optimized.cl
index b47546c9a..1b6e55088 100644
--- a/OpenCL/m04110_a0-optimized.cl
+++ b/OpenCL/m04110_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -428,7 +428,7 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04110_a0-pure.cl b/OpenCL/m04110_a0-pure.cl
index 8947b7726..c8e7ce93f 100644
--- a/OpenCL/m04110_a0-pure.cl
+++ b/OpenCL/m04110_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -162,7 +162,7 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04110_a1-optimized.cl b/OpenCL/m04110_a1-optimized.cl
index 01cd6749e..16c65cef1 100644
--- a/OpenCL/m04110_a1-optimized.cl
+++ b/OpenCL/m04110_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -486,7 +486,7 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04110_a1-pure.cl b/OpenCL/m04110_a1-pure.cl
index 60de2e1e9..011852191 100644
--- a/OpenCL/m04110_a1-pure.cl
+++ b/OpenCL/m04110_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -158,7 +158,7 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04110_a3-optimized.cl b/OpenCL/m04110_a3-optimized.cl
index 3c68d886b..0ca3c7898 100644
--- a/OpenCL/m04110_a3-optimized.cl
+++ b/OpenCL/m04110_a3-optimized.cl
@@ -729,7 +729,7 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -833,7 +833,7 @@ KERNEL_FQ void m04110_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -903,7 +903,7 @@ KERNEL_FQ void m04110_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -973,7 +973,7 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1043,7 +1043,7 @@ KERNEL_FQ void m04110_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1079,7 +1079,7 @@ KERNEL_FQ void m04110_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04110_a3-pure.cl b/OpenCL/m04110_a3-pure.cl
index 705a56903..1e33b0a62 100644
--- a/OpenCL/m04110_a3-pure.cl
+++ b/OpenCL/m04110_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -173,7 +173,7 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04310_a0-optimized.cl b/OpenCL/m04310_a0-optimized.cl
index eb14690b8..f29f76f6a 100644
--- a/OpenCL/m04310_a0-optimized.cl
+++ b/OpenCL/m04310_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -326,7 +326,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04310_a0-pure.cl b/OpenCL/m04310_a0-pure.cl
index fe94dfc83..422a6a5e7 100644
--- a/OpenCL/m04310_a0-pure.cl
+++ b/OpenCL/m04310_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -145,7 +145,7 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04310_a1-optimized.cl b/OpenCL/m04310_a1-optimized.cl
index c9bad2483..78448cf61 100644
--- a/OpenCL/m04310_a1-optimized.cl
+++ b/OpenCL/m04310_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -383,7 +383,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04310_a1-pure.cl b/OpenCL/m04310_a1-pure.cl
index b8fe23108..68b097fdd 100644
--- a/OpenCL/m04310_a1-pure.cl
+++ b/OpenCL/m04310_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -141,7 +141,7 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04310_a3-optimized.cl b/OpenCL/m04310_a3-optimized.cl
index 7fc003101..53ed7a4cb 100644
--- a/OpenCL/m04310_a3-optimized.cl
+++ b/OpenCL/m04310_a3-optimized.cl
@@ -606,7 +606,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -676,7 +676,7 @@ KERNEL_FQ void m04310_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -746,7 +746,7 @@ KERNEL_FQ void m04310_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -816,7 +816,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -886,7 +886,7 @@ KERNEL_FQ void m04310_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -956,7 +956,7 @@ KERNEL_FQ void m04310_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04310_a3-pure.cl b/OpenCL/m04310_a3-pure.cl
index 171b2c870..f5e5fa707 100644
--- a/OpenCL/m04310_a3-pure.cl
+++ b/OpenCL/m04310_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -154,7 +154,7 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04400_a0-optimized.cl b/OpenCL/m04400_a0-optimized.cl
index 250cde03e..4c8d31c33 100644
--- a/OpenCL/m04400_a0-optimized.cl
+++ b/OpenCL/m04400_a0-optimized.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m04400_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -353,7 +353,7 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04400_a0-pure.cl b/OpenCL/m04400_a0-pure.cl
index ebf3b90f5..68e8a657c 100644
--- a/OpenCL/m04400_a0-pure.cl
+++ b/OpenCL/m04400_a0-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m04400_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -140,7 +140,7 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04400_a1-optimized.cl b/OpenCL/m04400_a1-optimized.cl
index c577e2f1e..19b432aad 100644
--- a/OpenCL/m04400_a1-optimized.cl
+++ b/OpenCL/m04400_a1-optimized.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m04400_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -409,7 +409,7 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04400_a1-pure.cl b/OpenCL/m04400_a1-pure.cl
index 3841a1944..4968aa90e 100644
--- a/OpenCL/m04400_a1-pure.cl
+++ b/OpenCL/m04400_a1-pure.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m04400_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -136,7 +136,7 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04400_a3-optimized.cl b/OpenCL/m04400_a3-optimized.cl
index 44d0c22ee..153f13caf 100644
--- a/OpenCL/m04400_a3-optimized.cl
+++ b/OpenCL/m04400_a3-optimized.cl
@@ -578,7 +578,7 @@ KERNEL_FQ void m04400_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -648,7 +648,7 @@ KERNEL_FQ void m04400_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -718,7 +718,7 @@ KERNEL_FQ void m04400_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -788,7 +788,7 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -858,7 +858,7 @@ KERNEL_FQ void m04400_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -928,7 +928,7 @@ KERNEL_FQ void m04400_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04400_a3-pure.cl b/OpenCL/m04400_a3-pure.cl
index a4d922d68..60c8eb745 100644
--- a/OpenCL/m04400_a3-pure.cl
+++ b/OpenCL/m04400_a3-pure.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m04400_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -149,7 +149,7 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04500_a0-optimized.cl b/OpenCL/m04500_a0-optimized.cl
index d6a9cf83b..c8cd30328 100644
--- a/OpenCL/m04500_a0-optimized.cl
+++ b/OpenCL/m04500_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04500_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -379,7 +379,7 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04500_a0-pure.cl b/OpenCL/m04500_a0-pure.cl
index a63fd8320..dcea1b78a 100644
--- a/OpenCL/m04500_a0-pure.cl
+++ b/OpenCL/m04500_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04500_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -139,7 +139,7 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04500_a1-optimized.cl b/OpenCL/m04500_a1-optimized.cl
index 088bf10ef..05010648b 100644
--- a/OpenCL/m04500_a1-optimized.cl
+++ b/OpenCL/m04500_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04500_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -435,7 +435,7 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04500_a1-pure.cl b/OpenCL/m04500_a1-pure.cl
index 9ffd4672a..c480f3ecf 100644
--- a/OpenCL/m04500_a1-pure.cl
+++ b/OpenCL/m04500_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04500_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -135,7 +135,7 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04500_a3-optimized.cl b/OpenCL/m04500_a3-optimized.cl
index a96781501..2809cd1ae 100644
--- a/OpenCL/m04500_a3-optimized.cl
+++ b/OpenCL/m04500_a3-optimized.cl
@@ -637,7 +637,7 @@ KERNEL_FQ void m04500_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -707,7 +707,7 @@ KERNEL_FQ void m04500_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -777,7 +777,7 @@ KERNEL_FQ void m04500_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -847,7 +847,7 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -917,7 +917,7 @@ KERNEL_FQ void m04500_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -987,7 +987,7 @@ KERNEL_FQ void m04500_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04500_a3-pure.cl b/OpenCL/m04500_a3-pure.cl
index cd3a15aca..b258f97c3 100644
--- a/OpenCL/m04500_a3-pure.cl
+++ b/OpenCL/m04500_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04500_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -148,7 +148,7 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04520_a0-optimized.cl b/OpenCL/m04520_a0-optimized.cl
index da1d19413..73a119f5d 100644
--- a/OpenCL/m04520_a0-optimized.cl
+++ b/OpenCL/m04520_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -610,7 +610,7 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04520_a0-pure.cl b/OpenCL/m04520_a0-pure.cl
index 2879d8278..3e7e3034c 100644
--- a/OpenCL/m04520_a0-pure.cl
+++ b/OpenCL/m04520_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -154,7 +154,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04520_a1-optimized.cl b/OpenCL/m04520_a1-optimized.cl
index 36e773413..8bd6e12ab 100644
--- a/OpenCL/m04520_a1-optimized.cl
+++ b/OpenCL/m04520_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -666,7 +666,7 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04520_a1-pure.cl b/OpenCL/m04520_a1-pure.cl
index 4d3d47132..3556b9cc6 100644
--- a/OpenCL/m04520_a1-pure.cl
+++ b/OpenCL/m04520_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -150,7 +150,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04520_a3-optimized.cl b/OpenCL/m04520_a3-optimized.cl
index ddf48573d..f97e9dcb4 100644
--- a/OpenCL/m04520_a3-optimized.cl
+++ b/OpenCL/m04520_a3-optimized.cl
@@ -1090,7 +1090,7 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1160,7 +1160,7 @@ KERNEL_FQ void m04520_m08 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1230,7 +1230,7 @@ KERNEL_FQ void m04520_m16 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1300,7 +1300,7 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1370,7 +1370,7 @@ KERNEL_FQ void m04520_s08 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1440,7 +1440,7 @@ KERNEL_FQ void m04520_s16 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04520_a3-pure.cl b/OpenCL/m04520_a3-pure.cl
index 8a5a0839c..c1f4aa845 100644
--- a/OpenCL/m04520_a3-pure.cl
+++ b/OpenCL/m04520_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -165,7 +165,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04700_a0-optimized.cl b/OpenCL/m04700_a0-optimized.cl
index bdb3ec553..4b7bbba9f 100644
--- a/OpenCL/m04700_a0-optimized.cl
+++ b/OpenCL/m04700_a0-optimized.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m04700_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -336,7 +336,7 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04700_a0-pure.cl b/OpenCL/m04700_a0-pure.cl
index fa46ddc1a..601675b55 100644
--- a/OpenCL/m04700_a0-pure.cl
+++ b/OpenCL/m04700_a0-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m04700_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -135,7 +135,7 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04700_a1-optimized.cl b/OpenCL/m04700_a1-optimized.cl
index 802f4aebd..7b5e61c03 100644
--- a/OpenCL/m04700_a1-optimized.cl
+++ b/OpenCL/m04700_a1-optimized.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m04700_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -389,7 +389,7 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04700_a1-pure.cl b/OpenCL/m04700_a1-pure.cl
index 04d17f749..9fb80692a 100644
--- a/OpenCL/m04700_a1-pure.cl
+++ b/OpenCL/m04700_a1-pure.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m04700_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -131,7 +131,7 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04700_a3-optimized.cl b/OpenCL/m04700_a3-optimized.cl
index e103191d5..3d431931a 100644
--- a/OpenCL/m04700_a3-optimized.cl
+++ b/OpenCL/m04700_a3-optimized.cl
@@ -578,7 +578,7 @@ KERNEL_FQ void m04700_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -648,7 +648,7 @@ KERNEL_FQ void m04700_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -718,7 +718,7 @@ KERNEL_FQ void m04700_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -788,7 +788,7 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -858,7 +858,7 @@ KERNEL_FQ void m04700_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -928,7 +928,7 @@ KERNEL_FQ void m04700_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m04700_a3-pure.cl b/OpenCL/m04700_a3-pure.cl
index 81f473b37..633569c2b 100644
--- a/OpenCL/m04700_a3-pure.cl
+++ b/OpenCL/m04700_a3-pure.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m04700_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -144,7 +144,7 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m05300_a0-optimized.cl b/OpenCL/m05300_a0-optimized.cl
index 5ec319e24..3897688e7 100644
--- a/OpenCL/m05300_a0-optimized.cl
+++ b/OpenCL/m05300_a0-optimized.cl
@@ -125,14 +125,14 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_RULES_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -300,14 +300,14 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m05300_a1-optimized.cl b/OpenCL/m05300_a1-optimized.cl
index c4c8c955c..5f75de929 100644
--- a/OpenCL/m05300_a1-optimized.cl
+++ b/OpenCL/m05300_a1-optimized.cl
@@ -123,14 +123,14 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -358,14 +358,14 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m05300_a3-optimized.cl b/OpenCL/m05300_a3-optimized.cl
index b4b8d2173..5735c7e80 100644
--- a/OpenCL/m05300_a3-optimized.cl
+++ b/OpenCL/m05300_a3-optimized.cl
@@ -429,14 +429,14 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -502,14 +502,14 @@ KERNEL_FQ void m05300_m08 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -575,14 +575,14 @@ KERNEL_FQ void m05300_m16 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -648,14 +648,14 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -721,14 +721,14 @@ KERNEL_FQ void m05300_s08 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -794,14 +794,14 @@ KERNEL_FQ void m05300_s16 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i];
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m05400_a0-optimized.cl b/OpenCL/m05400_a0-optimized.cl
index 1201d0155..fe656c1cb 100644
--- a/OpenCL/m05400_a0-optimized.cl
+++ b/OpenCL/m05400_a0-optimized.cl
@@ -129,14 +129,14 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_RULES_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -313,14 +313,14 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m05400_a1-optimized.cl b/OpenCL/m05400_a1-optimized.cl
index f90f918f2..b8b95ce91 100644
--- a/OpenCL/m05400_a1-optimized.cl
+++ b/OpenCL/m05400_a1-optimized.cl
@@ -127,14 +127,14 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -379,14 +379,14 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m05400_a3-optimized.cl b/OpenCL/m05400_a3-optimized.cl
index 898fc162c..7919d7a9a 100644
--- a/OpenCL/m05400_a3-optimized.cl
+++ b/OpenCL/m05400_a3-optimized.cl
@@ -433,14 +433,14 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -506,14 +506,14 @@ KERNEL_FQ void m05400_m08 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -579,14 +579,14 @@ KERNEL_FQ void m05400_m16 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -652,14 +652,14 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -725,14 +725,14 @@ KERNEL_FQ void m05400_s08 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -798,14 +798,14 @@ KERNEL_FQ void m05400_s16 (KERN_ATTR_ESALT (ikepsk_t))
    * s_msg
    */
 
-  LOCAL_AS u32 s_nr_buf[16];
+  LOCAL_VK u32 s_nr_buf[16];
 
   for (u32 i = lid; i < 16; i += lsz)
   {
     s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]);
   }
 
-  LOCAL_AS u32 s_msg_buf[128];
+  LOCAL_VK u32 s_msg_buf[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m05500_a0-optimized.cl b/OpenCL/m05500_a0-optimized.cl
index 893cbc2c1..2b1866fc3 100644
--- a/OpenCL/m05500_a0-optimized.cl
+++ b/OpenCL/m05500_a0-optimized.cl
@@ -516,8 +516,8 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -729,8 +729,8 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m05500_a0-pure.cl b/OpenCL/m05500_a0-pure.cl
index e2087f918..db786716b 100644
--- a/OpenCL/m05500_a0-pure.cl
+++ b/OpenCL/m05500_a0-pure.cl
@@ -516,8 +516,8 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -640,8 +640,8 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m05500_a1-optimized.cl b/OpenCL/m05500_a1-optimized.cl
index 2f4a8a912..48c752d9c 100644
--- a/OpenCL/m05500_a1-optimized.cl
+++ b/OpenCL/m05500_a1-optimized.cl
@@ -514,8 +514,8 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -780,8 +780,8 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m05500_a1-pure.cl b/OpenCL/m05500_a1-pure.cl
index 6872de6cc..80e3b431a 100644
--- a/OpenCL/m05500_a1-pure.cl
+++ b/OpenCL/m05500_a1-pure.cl
@@ -514,8 +514,8 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -636,8 +636,8 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl
index 9901e818d..aecbd8664 100644
--- a/OpenCL/m05500_a3-optimized.cl
+++ b/OpenCL/m05500_a3-optimized.cl
@@ -847,8 +847,8 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -921,8 +921,8 @@ KERNEL_FQ void m05500_m08 (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -995,8 +995,8 @@ KERNEL_FQ void m05500_m16 (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -1069,8 +1069,8 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -1143,8 +1143,8 @@ KERNEL_FQ void m05500_s08 (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -1217,8 +1217,8 @@ KERNEL_FQ void m05500_s16 (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m05500_a3-pure.cl b/OpenCL/m05500_a3-pure.cl
index 467818f9f..298317f33 100644
--- a/OpenCL/m05500_a3-pure.cl
+++ b/OpenCL/m05500_a3-pure.cl
@@ -514,8 +514,8 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -649,8 +649,8 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m05600_a0-optimized.cl b/OpenCL/m05600_a0-optimized.cl
index ec3260e7a..d0eef25cd 100644
--- a/OpenCL/m05600_a0-optimized.cl
+++ b/OpenCL/m05600_a0-optimized.cl
@@ -128,14 +128,14 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_RULES_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -367,14 +367,14 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_RULES_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m05600_a1-optimized.cl b/OpenCL/m05600_a1-optimized.cl
index d208a87d5..dec67ea96 100644
--- a/OpenCL/m05600_a1-optimized.cl
+++ b/OpenCL/m05600_a1-optimized.cl
@@ -126,14 +126,14 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -423,14 +423,14 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m05600_a3-optimized.cl b/OpenCL/m05600_a3-optimized.cl
index f9204f9f0..1706db730 100644
--- a/OpenCL/m05600_a3-optimized.cl
+++ b/OpenCL/m05600_a3-optimized.cl
@@ -550,14 +550,14 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -623,14 +623,14 @@ KERNEL_FQ void m05600_m08 (KERN_ATTR_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -696,14 +696,14 @@ KERNEL_FQ void m05600_m16 (KERN_ATTR_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -769,14 +769,14 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -842,14 +842,14 @@ KERNEL_FQ void m05600_s08 (KERN_ATTR_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -915,14 +915,14 @@ KERNEL_FQ void m05600_s16 (KERN_ATTR_ESALT (netntlm_t))
    * salt
    */
 
-  LOCAL_AS u32 s_userdomain_buf[64];
+  LOCAL_VK u32 s_userdomain_buf[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
     s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i];
   }
 
-  LOCAL_AS u32 s_chall_buf[256];
+  LOCAL_VK u32 s_chall_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl
index 4d013411c..4de73c5ab 100644
--- a/OpenCL/m05800-optimized.cl
+++ b/OpenCL/m05800-optimized.cl
@@ -2300,8 +2300,8 @@ KERNEL_FQ void m05800_loop (KERN_ATTR_TMPS (androidpin_tmp_t))
    * cache precomputed conversion table in shared memory
    */
 
-  LOCAL_AS u32 s_pc_dec[1024];
-  LOCAL_AS u32 s_pc_len[1024];
+  LOCAL_VK u32 s_pc_dec[1024];
+  LOCAL_VK u32 s_pc_len[1024];
 
   for (u32 i = lid; i < 1024; i += lsz)
   {
diff --git a/OpenCL/m05800-pure.cl b/OpenCL/m05800-pure.cl
index 1813576b7..6e6f0d14c 100644
--- a/OpenCL/m05800-pure.cl
+++ b/OpenCL/m05800-pure.cl
@@ -2120,8 +2120,8 @@ KERNEL_FQ void m05800_loop (KERN_ATTR_TMPS (androidpin_tmp_t))
    * cache precomputed conversion table in shared memory
    */
 
-  LOCAL_AS u32 s_pc_dec[1024];
-  LOCAL_AS u32 s_pc_len[1024];
+  LOCAL_VK u32 s_pc_dec[1024];
+  LOCAL_VK u32 s_pc_len[1024];
 
   for (u32 i = lid; i < 1024; i += lsz)
   {
diff --git a/OpenCL/m06100_a0-optimized.cl b/OpenCL/m06100_a0-optimized.cl
index 3c6dd15f7..7b1c6eee9 100644
--- a/OpenCL/m06100_a0-optimized.cl
+++ b/OpenCL/m06100_a0-optimized.cl
@@ -37,8 +37,8 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -177,8 +177,8 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06100_a0-pure.cl b/OpenCL/m06100_a0-pure.cl
index 694081d4a..408512f66 100644
--- a/OpenCL/m06100_a0-pure.cl
+++ b/OpenCL/m06100_a0-pure.cl
@@ -32,8 +32,8 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -116,8 +116,8 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06100_a1-optimized.cl b/OpenCL/m06100_a1-optimized.cl
index 78435f227..54a9f492f 100644
--- a/OpenCL/m06100_a1-optimized.cl
+++ b/OpenCL/m06100_a1-optimized.cl
@@ -35,8 +35,8 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -233,8 +233,8 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06100_a1-pure.cl b/OpenCL/m06100_a1-pure.cl
index 7eaa1eead..7d7693177 100644
--- a/OpenCL/m06100_a1-pure.cl
+++ b/OpenCL/m06100_a1-pure.cl
@@ -30,8 +30,8 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -112,8 +112,8 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl
index cd97b7644..e61112e07 100644
--- a/OpenCL/m06100_a3-optimized.cl
+++ b/OpenCL/m06100_a3-optimized.cl
@@ -185,8 +185,8 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -277,8 +277,8 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -373,8 +373,8 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -465,8 +465,8 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06100_a3-pure.cl b/OpenCL/m06100_a3-pure.cl
index ab5a989ff..8e4038c99 100644
--- a/OpenCL/m06100_a3-pure.cl
+++ b/OpenCL/m06100_a3-pure.cl
@@ -30,8 +30,8 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -125,8 +125,8 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06211-pure.cl b/OpenCL/m06211-pure.cl
index 4f3b884ac..3a857dec9 100644
--- a/OpenCL/m06211-pure.cl
+++ b/OpenCL/m06211-pure.cl
@@ -93,7 +93,7 @@ KERNEL_FQ void m06211_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -305,17 +305,17 @@ KERNEL_FQ void m06211_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06212-pure.cl b/OpenCL/m06212-pure.cl
index d002aaa11..1d4d58bf5 100644
--- a/OpenCL/m06212-pure.cl
+++ b/OpenCL/m06212-pure.cl
@@ -93,7 +93,7 @@ KERNEL_FQ void m06212_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -305,17 +305,17 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06213-pure.cl b/OpenCL/m06213-pure.cl
index 48dc1f6c3..1be0dca74 100644
--- a/OpenCL/m06213-pure.cl
+++ b/OpenCL/m06213-pure.cl
@@ -93,7 +93,7 @@ KERNEL_FQ void m06213_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -305,17 +305,17 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06221-pure.cl b/OpenCL/m06221-pure.cl
index 8860e0d5c..c15153972 100644
--- a/OpenCL/m06221-pure.cl
+++ b/OpenCL/m06221-pure.cl
@@ -115,7 +115,7 @@ KERNEL_FQ void m06221_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -449,17 +449,17 @@ KERNEL_FQ void m06221_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06222-pure.cl b/OpenCL/m06222-pure.cl
index 508da0bbe..4a71b9078 100644
--- a/OpenCL/m06222-pure.cl
+++ b/OpenCL/m06222-pure.cl
@@ -115,7 +115,7 @@ KERNEL_FQ void m06222_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -449,17 +449,17 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06223-pure.cl b/OpenCL/m06223-pure.cl
index 75ff65068..035266e98 100644
--- a/OpenCL/m06223-pure.cl
+++ b/OpenCL/m06223-pure.cl
@@ -115,7 +115,7 @@ KERNEL_FQ void m06223_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -449,17 +449,17 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl
index 15e94c0e0..0d4c72c63 100644
--- a/OpenCL/m06231-pure.cl
+++ b/OpenCL/m06231-pure.cl
@@ -153,7 +153,7 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -168,8 +168,8 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -380,8 +380,8 @@ KERNEL_FQ void m06231_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -583,17 +583,17 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -634,8 +634,8 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl
index 378f46ffd..803f61715 100644
--- a/OpenCL/m06232-pure.cl
+++ b/OpenCL/m06232-pure.cl
@@ -153,7 +153,7 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -168,8 +168,8 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -380,8 +380,8 @@ KERNEL_FQ void m06232_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -583,17 +583,17 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -634,8 +634,8 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl
index 9e1e31ad6..32553633e 100644
--- a/OpenCL/m06233-pure.cl
+++ b/OpenCL/m06233-pure.cl
@@ -153,7 +153,7 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -168,8 +168,8 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -380,8 +380,8 @@ KERNEL_FQ void m06233_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -583,17 +583,17 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -634,8 +634,8 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06600-pure.cl b/OpenCL/m06600-pure.cl
index 409087e2f..db4906812 100644
--- a/OpenCL/m06600-pure.cl
+++ b/OpenCL/m06600-pure.cl
@@ -241,17 +241,17 @@ KERNEL_FQ void m06600_comp (KERN_ATTR_TMPS (agilekey_tmp_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06800-pure.cl b/OpenCL/m06800-pure.cl
index daa69e2ff..a2e95556f 100644
--- a/OpenCL/m06800-pure.cl
+++ b/OpenCL/m06800-pure.cl
@@ -272,17 +272,17 @@ KERNEL_FQ void m06800_comp (KERN_ATTR_TMPS (lastpass_tmp_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06900_a0-optimized.cl b/OpenCL/m06900_a0-optimized.cl
index 5c3ef3850..e199d70df 100644
--- a/OpenCL/m06900_a0-optimized.cl
+++ b/OpenCL/m06900_a0-optimized.cl
@@ -712,7 +712,7 @@ KERNEL_FQ void m06900_m04 (KERN_ATTR_RULES ())
    * sbox
    */
 
-  LOCAL_AS u32 s_tables[4][256];
+  LOCAL_VK u32 s_tables[4][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -928,7 +928,7 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_RULES ())
    * sbox
    */
 
-  LOCAL_AS u32 s_tables[4][256];
+  LOCAL_VK u32 s_tables[4][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06900_a1-optimized.cl b/OpenCL/m06900_a1-optimized.cl
index 41cc12653..bdf7943bc 100644
--- a/OpenCL/m06900_a1-optimized.cl
+++ b/OpenCL/m06900_a1-optimized.cl
@@ -710,7 +710,7 @@ KERNEL_FQ void m06900_m04 (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_tables[4][256];
+  LOCAL_VK u32 s_tables[4][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -980,7 +980,7 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_tables[4][256];
+  LOCAL_VK u32 s_tables[4][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m06900_a3-optimized.cl b/OpenCL/m06900_a3-optimized.cl
index 4f201a4c1..5becbdca7 100644
--- a/OpenCL/m06900_a3-optimized.cl
+++ b/OpenCL/m06900_a3-optimized.cl
@@ -1070,7 +1070,7 @@ KERNEL_FQ void m06900_m04 (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_tables[4][256];
+  LOCAL_VK u32 s_tables[4][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1139,7 +1139,7 @@ KERNEL_FQ void m06900_m08 (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_tables[4][256];
+  LOCAL_VK u32 s_tables[4][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1212,7 +1212,7 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_tables[4][256];
+  LOCAL_VK u32 s_tables[4][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1281,7 +1281,7 @@ KERNEL_FQ void m06900_s08 (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_tables[4][256];
+  LOCAL_VK u32 s_tables[4][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m07500_a0-optimized.cl b/OpenCL/m07500_a0-optimized.cl
index 4da89550a..cffd768b1 100644
--- a/OpenCL/m07500_a0-optimized.cl
+++ b/OpenCL/m07500_a0-optimized.cl
@@ -50,7 +50,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -451,9 +451,9 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_RULES_ESALT (krb5pa_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
@@ -557,9 +557,9 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_RULES_ESALT (krb5pa_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
diff --git a/OpenCL/m07500_a0-pure.cl b/OpenCL/m07500_a0-pure.cl
index 46693cc3e..7bdf81668 100644
--- a/OpenCL/m07500_a0-pure.cl
+++ b/OpenCL/m07500_a0-pure.cl
@@ -49,7 +49,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -296,9 +296,9 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_RULES_ESALT (krb5pa_t))
 
   COPY_PW (pws[gid]);
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -367,9 +367,9 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_RULES_ESALT (krb5pa_t))
 
   COPY_PW (pws[gid]);
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m07500_a1-optimized.cl b/OpenCL/m07500_a1-optimized.cl
index dc3ced845..4bf1a6a05 100644
--- a/OpenCL/m07500_a1-optimized.cl
+++ b/OpenCL/m07500_a1-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -449,9 +449,9 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
@@ -605,9 +605,9 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
diff --git a/OpenCL/m07500_a1-pure.cl b/OpenCL/m07500_a1-pure.cl
index e4da228cd..e6d11bf6d 100644
--- a/OpenCL/m07500_a1-pure.cl
+++ b/OpenCL/m07500_a1-pure.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -292,9 +292,9 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_ESALT (krb5pa_t))
    * base
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -361,9 +361,9 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_ESALT (krb5pa_t))
    * base
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m07500_a3-optimized.cl b/OpenCL/m07500_a3-optimized.cl
index 404a66986..f8c6d73b9 100644
--- a/OpenCL/m07500_a3-optimized.cl
+++ b/OpenCL/m07500_a3-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -528,9 +528,9 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -580,9 +580,9 @@ KERNEL_FQ void m07500_m08 (KERN_ATTR_ESALT (krb5pa_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -636,9 +636,9 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -688,9 +688,9 @@ KERNEL_FQ void m07500_s08 (KERN_ATTR_ESALT (krb5pa_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m07500_a3-pure.cl b/OpenCL/m07500_a3-pure.cl
index f22d7f07f..4e1257620 100644
--- a/OpenCL/m07500_a3-pure.cl
+++ b/OpenCL/m07500_a3-pure.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -313,9 +313,9 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t))
     w[idx] = pws[gid].i[idx];
   }
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -407,9 +407,9 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t))
     w[idx] = pws[gid].i[idx];
   }
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl
index 44452118c..f259ea250 100644
--- a/OpenCL/m08000_a0-optimized.cl
+++ b/OpenCL/m08000_a0-optimized.cl
@@ -228,8 +228,8 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_RULES ())
    * precompute final msg blocks
    */
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -399,8 +399,8 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_RULES ())
    * precompute final msg blocks
    */
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl
index 49c745bc6..599364f44 100644
--- a/OpenCL/m08000_a1-optimized.cl
+++ b/OpenCL/m08000_a1-optimized.cl
@@ -226,8 +226,8 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_BASIC ())
    * precompute final msg blocks
    */
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -451,8 +451,8 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_BASIC ())
    * precompute final msg blocks
    */
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl
index 2542e648e..3068a6621 100644
--- a/OpenCL/m08000_a3-optimized.cl
+++ b/OpenCL/m08000_a3-optimized.cl
@@ -489,8 +489,8 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_VECTOR ())
 
   const u32 pw_len = pws[gid].pw_len & 63;
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   /**
    * main
@@ -528,8 +528,8 @@ KERNEL_FQ void m08000_m08 (KERN_ATTR_VECTOR ())
 
   const u32 pw_len = pws[gid].pw_len & 63;
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   /**
    * main
@@ -567,8 +567,8 @@ KERNEL_FQ void m08000_m16 (KERN_ATTR_VECTOR ())
 
   const u32 pw_len = pws[gid].pw_len & 63;
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   /**
    * main
@@ -606,8 +606,8 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_VECTOR ())
 
   const u32 pw_len = pws[gid].pw_len & 63;
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   /**
    * main
@@ -645,8 +645,8 @@ KERNEL_FQ void m08000_s08 (KERN_ATTR_VECTOR ())
 
   const u32 pw_len = pws[gid].pw_len & 63;
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   /**
    * main
@@ -684,8 +684,8 @@ KERNEL_FQ void m08000_s16 (KERN_ATTR_VECTOR ())
 
   const u32 pw_len = pws[gid].pw_len & 63;
 
-  LOCAL_AS u32 w_s1[64];
-  LOCAL_AS u32 w_s2[64];
+  LOCAL_VK u32 w_s1[64];
+  LOCAL_VK u32 w_s2[64];
 
   /**
    * main
diff --git a/OpenCL/m08400_a0-optimized.cl b/OpenCL/m08400_a0-optimized.cl
index c17b8e44b..80dce4336 100644
--- a/OpenCL/m08400_a0-optimized.cl
+++ b/OpenCL/m08400_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -296,7 +296,7 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08400_a0-pure.cl b/OpenCL/m08400_a0-pure.cl
index 4c6294ef2..21a74fdfd 100644
--- a/OpenCL/m08400_a0-pure.cl
+++ b/OpenCL/m08400_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -193,7 +193,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08400_a1-optimized.cl b/OpenCL/m08400_a1-optimized.cl
index 3eab32af2..7bb51a077 100644
--- a/OpenCL/m08400_a1-optimized.cl
+++ b/OpenCL/m08400_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -352,7 +352,7 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08400_a1-pure.cl b/OpenCL/m08400_a1-pure.cl
index 450dcab4a..f78b4c31a 100644
--- a/OpenCL/m08400_a1-pure.cl
+++ b/OpenCL/m08400_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -189,7 +189,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08400_a3-optimized.cl b/OpenCL/m08400_a3-optimized.cl
index e1788a974..d981512bd 100644
--- a/OpenCL/m08400_a3-optimized.cl
+++ b/OpenCL/m08400_a3-optimized.cl
@@ -472,7 +472,7 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -542,7 +542,7 @@ KERNEL_FQ void m08400_m08 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -612,7 +612,7 @@ KERNEL_FQ void m08400_m16 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -682,7 +682,7 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -752,7 +752,7 @@ KERNEL_FQ void m08400_s08 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -822,7 +822,7 @@ KERNEL_FQ void m08400_s16 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08400_a3-pure.cl b/OpenCL/m08400_a3-pure.cl
index 712d789ec..e0b3463b6 100644
--- a/OpenCL/m08400_a3-pure.cl
+++ b/OpenCL/m08400_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -206,7 +206,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08500_a0-pure.cl b/OpenCL/m08500_a0-pure.cl
index e64afc6cc..5e0a0d044 100644
--- a/OpenCL/m08500_a0-pure.cl
+++ b/OpenCL/m08500_a0-pure.cl
@@ -536,8 +536,8 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -634,8 +634,8 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m08500_a1-pure.cl b/OpenCL/m08500_a1-pure.cl
index 1a4cf0a23..511c8da0e 100644
--- a/OpenCL/m08500_a1-pure.cl
+++ b/OpenCL/m08500_a1-pure.cl
@@ -534,8 +534,8 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -690,8 +690,8 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m08500_a3-pure.cl b/OpenCL/m08500_a3-pure.cl
index 2ddb4efc8..e59d35470 100644
--- a/OpenCL/m08500_a3-pure.cl
+++ b/OpenCL/m08500_a3-pure.cl
@@ -672,8 +672,8 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_VECTOR ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -746,8 +746,8 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_VECTOR ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m08600_a0-pure.cl b/OpenCL/m08600_a0-pure.cl
index 04c9a3594..0e6014846 100644
--- a/OpenCL/m08600_a0-pure.cl
+++ b/OpenCL/m08600_a0-pure.cl
@@ -244,7 +244,7 @@ KERNEL_FQ void m08600_mxx (KERN_ATTR_RULES ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -308,7 +308,7 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_RULES ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08600_a1-pure.cl b/OpenCL/m08600_a1-pure.cl
index bb081605d..694e72ad6 100644
--- a/OpenCL/m08600_a1-pure.cl
+++ b/OpenCL/m08600_a1-pure.cl
@@ -242,7 +242,7 @@ KERNEL_FQ void m08600_mxx (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -366,7 +366,7 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08600_a3-pure.cl b/OpenCL/m08600_a3-pure.cl
index b7b2f29df..deff07c41 100644
--- a/OpenCL/m08600_a3-pure.cl
+++ b/OpenCL/m08600_a3-pure.cl
@@ -347,7 +347,7 @@ KERNEL_FQ void m08600_mxx (KERN_ATTR_VECTOR ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -404,7 +404,7 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_VECTOR ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08700_a0-optimized.cl b/OpenCL/m08700_a0-optimized.cl
index add1bba63..7b63731ac 100644
--- a/OpenCL/m08700_a0-optimized.cl
+++ b/OpenCL/m08700_a0-optimized.cl
@@ -281,14 +281,14 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_RULES ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -468,14 +468,14 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_RULES ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08700_a1-optimized.cl b/OpenCL/m08700_a1-optimized.cl
index a8fc8adf3..240669d0c 100644
--- a/OpenCL/m08700_a1-optimized.cl
+++ b/OpenCL/m08700_a1-optimized.cl
@@ -279,14 +279,14 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -526,14 +526,14 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_BASIC ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08700_a3-optimized.cl b/OpenCL/m08700_a3-optimized.cl
index 07eaf035c..79121b96c 100644
--- a/OpenCL/m08700_a3-optimized.cl
+++ b/OpenCL/m08700_a3-optimized.cl
@@ -558,14 +558,14 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_VECTOR ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -626,14 +626,14 @@ KERNEL_FQ void m08700_m08 (KERN_ATTR_VECTOR ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -694,14 +694,14 @@ KERNEL_FQ void m08700_m16 (KERN_ATTR_VECTOR ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -762,14 +762,14 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_VECTOR ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -830,14 +830,14 @@ KERNEL_FQ void m08700_s08 (KERN_ATTR_VECTOR ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -898,14 +898,14 @@ KERNEL_FQ void m08700_s16 (KERN_ATTR_VECTOR ())
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m08800-pure.cl b/OpenCL/m08800-pure.cl
index 59c14e6ee..faf8d729c 100644
--- a/OpenCL/m08800-pure.cl
+++ b/OpenCL/m08800-pure.cl
@@ -237,17 +237,17 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m09000-pure.cl b/OpenCL/m09000-pure.cl
index e04a8594b..6959ad6b6 100644
--- a/OpenCL/m09000-pure.cl
+++ b/OpenCL/m09000-pure.cl
@@ -471,15 +471,15 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09
     P[i] = c_pbox[i];
   }
 
-  LOCAL_AS u32 S0_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S1_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S2_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S3_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_AS u32 *S0 = S0_all[lid];
-  LOCAL_AS u32 *S1 = S1_all[lid];
-  LOCAL_AS u32 *S2 = S2_all[lid];
-  LOCAL_AS u32 *S3 = S3_all[lid];
+  LOCAL_VK u32 *S0 = S0_all[lid];
+  LOCAL_VK u32 *S1 = S1_all[lid];
+  LOCAL_VK u32 *S2 = S2_all[lid];
+  LOCAL_VK u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {
@@ -602,15 +602,15 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09
     P[i] = tmps[gid].P[i];
   }
 
-  LOCAL_AS u32 S0_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S1_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S2_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S3_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_AS u32 *S0 = S0_all[lid];
-  LOCAL_AS u32 *S1 = S1_all[lid];
-  LOCAL_AS u32 *S2 = S2_all[lid];
-  LOCAL_AS u32 *S3 = S3_all[lid];
+  LOCAL_VK u32 *S0 = S0_all[lid];
+  LOCAL_VK u32 *S1 = S1_all[lid];
+  LOCAL_VK u32 *S2 = S2_all[lid];
+  LOCAL_VK u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {
diff --git a/OpenCL/m09100-pure.cl b/OpenCL/m09100-pure.cl
index 004bb3e85..38f6fc97a 100644
--- a/OpenCL/m09100-pure.cl
+++ b/OpenCL/m09100-pure.cl
@@ -409,14 +409,14 @@ KERNEL_FQ void m09100_init (KERN_ATTR_TMPS (lotus8_tmp_t))
    * sbox
    */
 
-  LOCAL_AS u32 s_lotus_magic_table[256];
+  LOCAL_VK u32 s_lotus_magic_table[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
     s_lotus_magic_table[i] = lotus_magic_table[i];
   }
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m09400-pure.cl b/OpenCL/m09400-pure.cl
index 3da6fd261..2ee3a6844 100644
--- a/OpenCL/m09400-pure.cl
+++ b/OpenCL/m09400-pure.cl
@@ -139,17 +139,17 @@ KERNEL_FQ void m09400_comp (KERN_ATTR_TMPS_ESALT (office2007_tmp_t, office2007_t
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m09500-pure.cl b/OpenCL/m09500-pure.cl
index 20ca9ea38..b2ebe70d9 100644
--- a/OpenCL/m09500-pure.cl
+++ b/OpenCL/m09500-pure.cl
@@ -137,17 +137,17 @@ KERNEL_FQ void m09500_comp (KERN_ATTR_TMPS_ESALT (office2010_tmp_t, office2010_t
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m09600-pure.cl b/OpenCL/m09600-pure.cl
index 94f47dc00..dd1520040 100644
--- a/OpenCL/m09600-pure.cl
+++ b/OpenCL/m09600-pure.cl
@@ -183,17 +183,17 @@ KERNEL_FQ void m09600_comp (KERN_ATTR_TMPS_ESALT (office2013_tmp_t, office2013_t
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m09700_a0-optimized.cl b/OpenCL/m09700_a0-optimized.cl
index 70cdd577c..c92fbb483 100644
--- a/OpenCL/m09700_a0-optimized.cl
+++ b/OpenCL/m09700_a0-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -527,9 +527,9 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -714,9 +714,9 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09700_a1-optimized.cl b/OpenCL/m09700_a1-optimized.cl
index 088f05c28..e404894d0 100644
--- a/OpenCL/m09700_a1-optimized.cl
+++ b/OpenCL/m09700_a1-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -525,9 +525,9 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -770,9 +770,9 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09700_a3-optimized.cl b/OpenCL/m09700_a3-optimized.cl
index 6414b53a0..82d6d308e 100644
--- a/OpenCL/m09700_a3-optimized.cl
+++ b/OpenCL/m09700_a3-optimized.cl
@@ -43,7 +43,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -157,7 +157,7 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -563,7 +563,7 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -1012,7 +1012,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -1061,7 +1061,7 @@ KERNEL_FQ void m09700_m08 (KERN_ATTR_ESALT (oldoffice01_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -1110,7 +1110,7 @@ KERNEL_FQ void m09700_m16 (KERN_ATTR_ESALT (oldoffice01_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -1159,7 +1159,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -1208,7 +1208,7 @@ KERNEL_FQ void m09700_s08 (KERN_ATTR_ESALT (oldoffice01_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -1257,7 +1257,7 @@ KERNEL_FQ void m09700_s16 (KERN_ATTR_ESALT (oldoffice01_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m09710_a0-optimized.cl b/OpenCL/m09710_a0-optimized.cl
index 33c218185..a819ab28b 100644
--- a/OpenCL/m09710_a0-optimized.cl
+++ b/OpenCL/m09710_a0-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -183,9 +183,9 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -321,9 +321,9 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09710_a1-optimized.cl b/OpenCL/m09710_a1-optimized.cl
index fc6ff0cd7..5bf2afb72 100644
--- a/OpenCL/m09710_a1-optimized.cl
+++ b/OpenCL/m09710_a1-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -181,9 +181,9 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -365,9 +365,9 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09710_a3-optimized.cl b/OpenCL/m09710_a3-optimized.cl
index fb3df2602..9def38990 100644
--- a/OpenCL/m09710_a3-optimized.cl
+++ b/OpenCL/m09710_a3-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -160,7 +160,7 @@ DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -269,7 +269,7 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -421,7 +421,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09710m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -478,7 +478,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09710s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m09800_a0-optimized.cl b/OpenCL/m09800_a0-optimized.cl
index 3646b66eb..7b5847753 100644
--- a/OpenCL/m09800_a0-optimized.cl
+++ b/OpenCL/m09800_a0-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -183,9 +183,9 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -383,9 +383,9 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09800_a1-optimized.cl b/OpenCL/m09800_a1-optimized.cl
index becffa65e..d78f9f799 100644
--- a/OpenCL/m09800_a1-optimized.cl
+++ b/OpenCL/m09800_a1-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -181,9 +181,9 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -431,9 +431,9 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09800_a3-optimized.cl b/OpenCL/m09800_a3-optimized.cl
index 07f9c09a5..60c77499e 100644
--- a/OpenCL/m09800_a3-optimized.cl
+++ b/OpenCL/m09800_a3-optimized.cl
@@ -43,7 +43,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -157,7 +157,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -325,7 +325,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -536,7 +536,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -585,7 +585,7 @@ KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -634,7 +634,7 @@ KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -683,7 +683,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -732,7 +732,7 @@ KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -781,7 +781,7 @@ KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m09810_a0-optimized.cl b/OpenCL/m09810_a0-optimized.cl
index e5ddf247a..7d68831f8 100644
--- a/OpenCL/m09810_a0-optimized.cl
+++ b/OpenCL/m09810_a0-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -183,9 +183,9 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -308,9 +308,9 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09810_a1-optimized.cl b/OpenCL/m09810_a1-optimized.cl
index ce654900d..a94225f9f 100644
--- a/OpenCL/m09810_a1-optimized.cl
+++ b/OpenCL/m09810_a1-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -181,9 +181,9 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -352,9 +352,9 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09810_a3-optimized.cl b/OpenCL/m09810_a3-optimized.cl
index 360170bbe..c4dbe0a54 100644
--- a/OpenCL/m09810_a3-optimized.cl
+++ b/OpenCL/m09810_a3-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -160,7 +160,7 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -254,7 +254,7 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -391,7 +391,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -440,7 +440,7 @@ KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -489,7 +489,7 @@ KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -538,7 +538,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -587,7 +587,7 @@ KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -636,7 +636,7 @@ KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m10400_a0-optimized.cl b/OpenCL/m10400_a0-optimized.cl
index fbd378c0b..8460b1d5b 100644
--- a/OpenCL/m10400_a0-optimized.cl
+++ b/OpenCL/m10400_a0-optimized.cl
@@ -72,7 +72,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -192,9 +192,9 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_RULES_ESALT (pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -368,9 +368,9 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_RULES_ESALT (pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
diff --git a/OpenCL/m10400_a1-optimized.cl b/OpenCL/m10400_a1-optimized.cl
index 5ad30d180..15337df7f 100644
--- a/OpenCL/m10400_a1-optimized.cl
+++ b/OpenCL/m10400_a1-optimized.cl
@@ -70,7 +70,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -190,9 +190,9 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -426,9 +426,9 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
diff --git a/OpenCL/m10400_a3-optimized.cl b/OpenCL/m10400_a3-optimized.cl
index 5f134e4d5..dcc1e12f6 100644
--- a/OpenCL/m10400_a3-optimized.cl
+++ b/OpenCL/m10400_a3-optimized.cl
@@ -70,7 +70,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -169,7 +169,7 @@ DECLSPEC void m10400m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -335,7 +335,7 @@ DECLSPEC void m10400s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -544,7 +544,7 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -593,7 +593,7 @@ KERNEL_FQ void m10400_m08 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -642,7 +642,7 @@ KERNEL_FQ void m10400_m16 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -691,7 +691,7 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -740,7 +740,7 @@ KERNEL_FQ void m10400_s08 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -789,7 +789,7 @@ KERNEL_FQ void m10400_s16 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m10410_a0-optimized.cl b/OpenCL/m10410_a0-optimized.cl
index ede8b87ea..db2aec224 100644
--- a/OpenCL/m10410_a0-optimized.cl
+++ b/OpenCL/m10410_a0-optimized.cl
@@ -72,7 +72,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -192,8 +192,8 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
@@ -264,8 +264,8 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * digest
diff --git a/OpenCL/m10410_a1-optimized.cl b/OpenCL/m10410_a1-optimized.cl
index 493638f0b..4ba60e88c 100644
--- a/OpenCL/m10410_a1-optimized.cl
+++ b/OpenCL/m10410_a1-optimized.cl
@@ -70,7 +70,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -190,9 +190,9 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -332,9 +332,9 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
diff --git a/OpenCL/m10410_a3-optimized.cl b/OpenCL/m10410_a3-optimized.cl
index 0513d4053..30d26f4d9 100644
--- a/OpenCL/m10410_a3-optimized.cl
+++ b/OpenCL/m10410_a3-optimized.cl
@@ -70,7 +70,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -169,7 +169,7 @@ DECLSPEC void m10410m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
@@ -212,7 +212,7 @@ DECLSPEC void m10410s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * digest
@@ -298,7 +298,7 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -347,7 +347,7 @@ KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -396,7 +396,7 @@ KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -445,7 +445,7 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -494,7 +494,7 @@ KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -543,7 +543,7 @@ KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
   m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m10500-pure.cl b/OpenCL/m10500-pure.cl
index 844382357..2ba76ba62 100644
--- a/OpenCL/m10500-pure.cl
+++ b/OpenCL/m10500-pure.cl
@@ -76,7 +76,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -377,9 +377,9 @@ KERNEL_FQ void m10500_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl
index 5925656f1..a44924c28 100644
--- a/OpenCL/m10700-optimized.cl
+++ b/OpenCL/m10700-optimized.cl
@@ -594,11 +594,11 @@ KERNEL_FQ void m10700_loop (KERN_ATTR_TMPS_ESALT (pdf17l8_tmp_t, pdf_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m10700-pure.cl b/OpenCL/m10700-pure.cl
index c302be978..f43c024cf 100644
--- a/OpenCL/m10700-pure.cl
+++ b/OpenCL/m10700-pure.cl
@@ -1195,11 +1195,11 @@ KERNEL_FQ void m10700_loop (KERN_ATTR_TMPS_ESALT (pdf17l8_tmp_t, pdf_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11100_a0-optimized.cl b/OpenCL/m11100_a0-optimized.cl
index 1070b79f4..2f77f1366 100644
--- a/OpenCL/m11100_a0-optimized.cl
+++ b/OpenCL/m11100_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -376,7 +376,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11100_a0-pure.cl b/OpenCL/m11100_a0-pure.cl
index 70d5554e3..5896ea564 100644
--- a/OpenCL/m11100_a0-pure.cl
+++ b/OpenCL/m11100_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -188,7 +188,7 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11100_a1-optimized.cl b/OpenCL/m11100_a1-optimized.cl
index 87eff0525..1f003b012 100644
--- a/OpenCL/m11100_a1-optimized.cl
+++ b/OpenCL/m11100_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -434,7 +434,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11100_a1-pure.cl b/OpenCL/m11100_a1-pure.cl
index 6a41a9337..08c85eea5 100644
--- a/OpenCL/m11100_a1-pure.cl
+++ b/OpenCL/m11100_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -184,7 +184,7 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11100_a3-optimized.cl b/OpenCL/m11100_a3-optimized.cl
index 020f60870..6e1abef0c 100644
--- a/OpenCL/m11100_a3-optimized.cl
+++ b/OpenCL/m11100_a3-optimized.cl
@@ -665,7 +665,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -735,7 +735,7 @@ KERNEL_FQ void m11100_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -805,7 +805,7 @@ KERNEL_FQ void m11100_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -875,7 +875,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -945,7 +945,7 @@ KERNEL_FQ void m11100_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -1015,7 +1015,7 @@ KERNEL_FQ void m11100_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11100_a3-pure.cl b/OpenCL/m11100_a3-pure.cl
index e45643862..68b100050 100644
--- a/OpenCL/m11100_a3-pure.cl
+++ b/OpenCL/m11100_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -227,7 +227,7 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11300-pure.cl b/OpenCL/m11300-pure.cl
index 0bd10ab73..c734b102c 100644
--- a/OpenCL/m11300-pure.cl
+++ b/OpenCL/m11300-pure.cl
@@ -220,17 +220,17 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11400_a0-pure.cl b/OpenCL/m11400_a0-pure.cl
index 76bcdf904..d31492c77 100644
--- a/OpenCL/m11400_a0-pure.cl
+++ b/OpenCL/m11400_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_RULES_ESALT (sip_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -150,7 +150,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_RULES_ESALT (sip_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11400_a1-pure.cl b/OpenCL/m11400_a1-pure.cl
index 7059d4686..f691a8f11 100644
--- a/OpenCL/m11400_a1-pure.cl
+++ b/OpenCL/m11400_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_ESALT (sip_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -144,7 +144,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_ESALT (sip_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11400_a3-pure.cl b/OpenCL/m11400_a3-pure.cl
index aecf5fd78..595eb30ec 100644
--- a/OpenCL/m11400_a3-pure.cl
+++ b/OpenCL/m11400_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_VECTOR_ESALT (sip_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -170,7 +170,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_VECTOR_ESALT (sip_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11700_a0-optimized.cl b/OpenCL/m11700_a0-optimized.cl
index 736355ac8..8a474ec3a 100644
--- a/OpenCL/m11700_a0-optimized.cl
+++ b/OpenCL/m11700_a0-optimized.cl
@@ -101,7 +101,7 @@ KERNEL_FQ void m11700_m04 (KERN_ATTR_RULES ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -259,7 +259,7 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_RULES ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11700_a0-pure.cl b/OpenCL/m11700_a0-pure.cl
index 51daa55e4..30476e4de 100644
--- a/OpenCL/m11700_a0-pure.cl
+++ b/OpenCL/m11700_a0-pure.cl
@@ -32,7 +32,7 @@ KERNEL_FQ void m11700_mxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -105,7 +105,7 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11700_a1-optimized.cl b/OpenCL/m11700_a1-optimized.cl
index cf0b15b75..3df9793a0 100644
--- a/OpenCL/m11700_a1-optimized.cl
+++ b/OpenCL/m11700_a1-optimized.cl
@@ -99,7 +99,7 @@ KERNEL_FQ void m11700_m04 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -315,7 +315,7 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11700_a1-pure.cl b/OpenCL/m11700_a1-pure.cl
index 961c6f91a..703f3a2b8 100644
--- a/OpenCL/m11700_a1-pure.cl
+++ b/OpenCL/m11700_a1-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11700_mxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -101,7 +101,7 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11700_a3-optimized.cl b/OpenCL/m11700_a3-optimized.cl
index a63537bad..490d882ee 100644
--- a/OpenCL/m11700_a3-optimized.cl
+++ b/OpenCL/m11700_a3-optimized.cl
@@ -268,7 +268,7 @@ KERNEL_FQ void m11700_m04 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -332,7 +332,7 @@ KERNEL_FQ void m11700_m08 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -396,7 +396,7 @@ KERNEL_FQ void m11700_m16 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -460,7 +460,7 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -524,7 +524,7 @@ KERNEL_FQ void m11700_s08 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -588,7 +588,7 @@ KERNEL_FQ void m11700_s16 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11700_a3-pure.cl b/OpenCL/m11700_a3-pure.cl
index 468d149ce..e37e72a1b 100644
--- a/OpenCL/m11700_a3-pure.cl
+++ b/OpenCL/m11700_a3-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11700_mxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -114,7 +114,7 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11750_a0-pure.cl b/OpenCL/m11750_a0-pure.cl
index 398cd4271..d7286cf90 100644
--- a/OpenCL/m11750_a0-pure.cl
+++ b/OpenCL/m11750_a0-pure.cl
@@ -32,7 +32,7 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -114,7 +114,7 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11750_a1-pure.cl b/OpenCL/m11750_a1-pure.cl
index 010ba1439..c2499b6fe 100644
--- a/OpenCL/m11750_a1-pure.cl
+++ b/OpenCL/m11750_a1-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -137,7 +137,7 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11750_a3-pure.cl b/OpenCL/m11750_a3-pure.cl
index f534c1e7c..6d0d5eeae 100644
--- a/OpenCL/m11750_a3-pure.cl
+++ b/OpenCL/m11750_a3-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -123,7 +123,7 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11760_a0-pure.cl b/OpenCL/m11760_a0-pure.cl
index e0a9c80b5..b7759a991 100644
--- a/OpenCL/m11760_a0-pure.cl
+++ b/OpenCL/m11760_a0-pure.cl
@@ -32,7 +32,7 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -116,7 +116,7 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11760_a1-pure.cl b/OpenCL/m11760_a1-pure.cl
index e29eab43b..d123d394c 100644
--- a/OpenCL/m11760_a1-pure.cl
+++ b/OpenCL/m11760_a1-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -139,7 +139,7 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11760_a3-pure.cl b/OpenCL/m11760_a3-pure.cl
index ad3927051..3738364d3 100644
--- a/OpenCL/m11760_a3-pure.cl
+++ b/OpenCL/m11760_a3-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -125,7 +125,7 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11800_a0-optimized.cl b/OpenCL/m11800_a0-optimized.cl
index 969da8abb..233a39593 100644
--- a/OpenCL/m11800_a0-optimized.cl
+++ b/OpenCL/m11800_a0-optimized.cl
@@ -101,7 +101,7 @@ KERNEL_FQ void m11800_m04 (KERN_ATTR_RULES ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -259,7 +259,7 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_RULES ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11800_a0-pure.cl b/OpenCL/m11800_a0-pure.cl
index f688af4a3..724ccac23 100644
--- a/OpenCL/m11800_a0-pure.cl
+++ b/OpenCL/m11800_a0-pure.cl
@@ -32,7 +32,7 @@ KERNEL_FQ void m11800_mxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -105,7 +105,7 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11800_a1-optimized.cl b/OpenCL/m11800_a1-optimized.cl
index 9b469e958..4e082982c 100644
--- a/OpenCL/m11800_a1-optimized.cl
+++ b/OpenCL/m11800_a1-optimized.cl
@@ -99,7 +99,7 @@ KERNEL_FQ void m11800_m04 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -315,7 +315,7 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11800_a1-pure.cl b/OpenCL/m11800_a1-pure.cl
index d65ed1932..449d8d6e7 100644
--- a/OpenCL/m11800_a1-pure.cl
+++ b/OpenCL/m11800_a1-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11800_mxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -101,7 +101,7 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11800_a3-optimized.cl b/OpenCL/m11800_a3-optimized.cl
index 8ccf6bac4..77d8a99aa 100644
--- a/OpenCL/m11800_a3-optimized.cl
+++ b/OpenCL/m11800_a3-optimized.cl
@@ -268,7 +268,7 @@ KERNEL_FQ void m11800_m04 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -332,7 +332,7 @@ KERNEL_FQ void m11800_m08 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -396,7 +396,7 @@ KERNEL_FQ void m11800_m16 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -460,7 +460,7 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -524,7 +524,7 @@ KERNEL_FQ void m11800_s08 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -588,7 +588,7 @@ KERNEL_FQ void m11800_s16 (KERN_ATTR_BASIC ())
    * shared lookup table
    */
 
-  LOCAL_AS u64 s_sbob_sl64[8][256];
+  LOCAL_VK u64 s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11800_a3-pure.cl b/OpenCL/m11800_a3-pure.cl
index e687e5ac8..101d13daa 100644
--- a/OpenCL/m11800_a3-pure.cl
+++ b/OpenCL/m11800_a3-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11800_mxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -114,7 +114,7 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11850_a0-pure.cl b/OpenCL/m11850_a0-pure.cl
index ec7347b97..8182969d4 100644
--- a/OpenCL/m11850_a0-pure.cl
+++ b/OpenCL/m11850_a0-pure.cl
@@ -32,7 +32,7 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -114,7 +114,7 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11850_a1-pure.cl b/OpenCL/m11850_a1-pure.cl
index 23cb79a7c..6fd6ad1d9 100644
--- a/OpenCL/m11850_a1-pure.cl
+++ b/OpenCL/m11850_a1-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -137,7 +137,7 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11850_a3-pure.cl b/OpenCL/m11850_a3-pure.cl
index bb9295311..e17e32cb6 100644
--- a/OpenCL/m11850_a3-pure.cl
+++ b/OpenCL/m11850_a3-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -123,7 +123,7 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11860_a0-pure.cl b/OpenCL/m11860_a0-pure.cl
index 38b7e80fe..5f89f576f 100644
--- a/OpenCL/m11860_a0-pure.cl
+++ b/OpenCL/m11860_a0-pure.cl
@@ -32,7 +32,7 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -116,7 +116,7 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_RULES ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11860_a1-pure.cl b/OpenCL/m11860_a1-pure.cl
index 698365296..8a14bfa14 100644
--- a/OpenCL/m11860_a1-pure.cl
+++ b/OpenCL/m11860_a1-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -139,7 +139,7 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_BASIC ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m11860_a3-pure.cl b/OpenCL/m11860_a3-pure.cl
index e1b70ecf2..e8d57aff9 100644
--- a/OpenCL/m11860_a3-pure.cl
+++ b/OpenCL/m11860_a3-pure.cl
@@ -30,7 +30,7 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -125,7 +125,7 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_VECTOR ())
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12400-pure.cl b/OpenCL/m12400-pure.cl
index 43002fca5..7e913a518 100644
--- a/OpenCL/m12400-pure.cl
+++ b/OpenCL/m12400-pure.cl
@@ -511,8 +511,8 @@ KERNEL_FQ void m12400_init (KERN_ATTR_TMPS (bsdicrypt_tmp_t))
    * sbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -647,8 +647,8 @@ KERNEL_FQ void m12400_loop (KERN_ATTR_TMPS (bsdicrypt_tmp_t))
    * sbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl
index d07c43cb1..dbcd97121 100644
--- a/OpenCL/m12500-pure.cl
+++ b/OpenCL/m12500-pure.cl
@@ -292,17 +292,17 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12600_a0-optimized.cl b/OpenCL/m12600_a0-optimized.cl
index 75a91fbb7..a723b3f1d 100644
--- a/OpenCL/m12600_a0-optimized.cl
+++ b/OpenCL/m12600_a0-optimized.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -383,7 +383,7 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12600_a0-pure.cl b/OpenCL/m12600_a0-pure.cl
index 8f7dcc23c..6519cc9d1 100644
--- a/OpenCL/m12600_a0-pure.cl
+++ b/OpenCL/m12600_a0-pure.cl
@@ -43,7 +43,7 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -179,7 +179,7 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12600_a1-optimized.cl b/OpenCL/m12600_a1-optimized.cl
index c2b5796bf..76d5f1537 100644
--- a/OpenCL/m12600_a1-optimized.cl
+++ b/OpenCL/m12600_a1-optimized.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -439,7 +439,7 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12600_a1-pure.cl b/OpenCL/m12600_a1-pure.cl
index 6191cbe79..420f8491f 100644
--- a/OpenCL/m12600_a1-pure.cl
+++ b/OpenCL/m12600_a1-pure.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -175,7 +175,7 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12600_a3-optimized.cl b/OpenCL/m12600_a3-optimized.cl
index 267de8f0d..65d99f23d 100644
--- a/OpenCL/m12600_a3-optimized.cl
+++ b/OpenCL/m12600_a3-optimized.cl
@@ -638,7 +638,7 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -708,7 +708,7 @@ KERNEL_FQ void m12600_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -778,7 +778,7 @@ KERNEL_FQ void m12600_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -848,7 +848,7 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -918,7 +918,7 @@ KERNEL_FQ void m12600_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -988,7 +988,7 @@ KERNEL_FQ void m12600_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12600_a3-pure.cl b/OpenCL/m12600_a3-pure.cl
index a335c7330..23f23023f 100644
--- a/OpenCL/m12600_a3-pure.cl
+++ b/OpenCL/m12600_a3-pure.cl
@@ -41,7 +41,7 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -188,7 +188,7 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12700-pure.cl b/OpenCL/m12700-pure.cl
index a8b5710ce..8fc00d476 100644
--- a/OpenCL/m12700-pure.cl
+++ b/OpenCL/m12700-pure.cl
@@ -250,17 +250,17 @@ KERNEL_FQ void m12700_comp (KERN_ATTR_TMPS (mywallet_tmp_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m12800-pure.cl b/OpenCL/m12800-pure.cl
index 28887af9d..702932163 100644
--- a/OpenCL/m12800-pure.cl
+++ b/OpenCL/m12800-pure.cl
@@ -90,7 +90,7 @@ KERNEL_FQ void m12800_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
    * lookup ascii table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl
index c3c972c4f..a98b0e95d 100644
--- a/OpenCL/m13100_a0-optimized.cl
+++ b/OpenCL/m13100_a0-optimized.cl
@@ -49,7 +49,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -610,9 +610,9 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -708,9 +708,9 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl
index ef875c481..1ce743e85 100644
--- a/OpenCL/m13100_a0-pure.cl
+++ b/OpenCL/m13100_a0-pure.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -404,9 +404,9 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t))
 
   COPY_PW (pws[gid]);
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -466,9 +466,9 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t))
 
   COPY_PW (pws[gid]);
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl
index 48bb36712..406980f20 100644
--- a/OpenCL/m13100_a1-optimized.cl
+++ b/OpenCL/m13100_a1-optimized.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -607,9 +607,9 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -754,9 +754,9 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl
index 0c770b0fe..907a274b1 100644
--- a/OpenCL/m13100_a1-pure.cl
+++ b/OpenCL/m13100_a1-pure.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -400,9 +400,9 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t))
    * base
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -460,9 +460,9 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t))
    * base
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl
index 89d850968..b8c27d187 100644
--- a/OpenCL/m13100_a3-optimized.cl
+++ b/OpenCL/m13100_a3-optimized.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -677,9 +677,9 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -729,9 +729,9 @@ KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -785,9 +785,9 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -837,9 +837,9 @@ KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl
index 9ce8b4617..d8751b9b3 100644
--- a/OpenCL/m13100_a3-pure.cl
+++ b/OpenCL/m13100_a3-pure.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -409,9 +409,9 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t))
     w[idx] = pws[gid].i[idx];
   }
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -482,9 +482,9 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t))
     w[idx] = pws[gid].i[idx];
   }
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m13200-pure.cl b/OpenCL/m13200-pure.cl
index 1163d0d67..68e7d6048 100644
--- a/OpenCL/m13200-pure.cl
+++ b/OpenCL/m13200-pure.cl
@@ -86,17 +86,17 @@ KERNEL_FQ void m13200_loop (KERN_ATTR_TMPS (axcrypt_tmp_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13400-pure.cl b/OpenCL/m13400-pure.cl
index 420d303cf..1ad66422f 100644
--- a/OpenCL/m13400-pure.cl
+++ b/OpenCL/m13400-pure.cl
@@ -176,11 +176,11 @@ KERNEL_FQ void m13400_loop (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -264,17 +264,17 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13711-pure.cl b/OpenCL/m13711-pure.cl
index 9fea1d790..7f2551090 100644
--- a/OpenCL/m13711-pure.cl
+++ b/OpenCL/m13711-pure.cl
@@ -134,7 +134,7 @@ KERNEL_FQ void m13711_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -261,17 +261,17 @@ KERNEL_FQ void m13711_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -445,17 +445,17 @@ KERNEL_FQ void m13711_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13712-pure.cl b/OpenCL/m13712-pure.cl
index ee75b7170..0de2c68b1 100644
--- a/OpenCL/m13712-pure.cl
+++ b/OpenCL/m13712-pure.cl
@@ -185,7 +185,7 @@ KERNEL_FQ void m13712_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -312,17 +312,17 @@ KERNEL_FQ void m13712_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -497,17 +497,17 @@ KERNEL_FQ void m13712_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13713-pure.cl b/OpenCL/m13713-pure.cl
index c9af26c73..75384ffe8 100644
--- a/OpenCL/m13713-pure.cl
+++ b/OpenCL/m13713-pure.cl
@@ -250,7 +250,7 @@ KERNEL_FQ void m13713_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -377,17 +377,17 @@ KERNEL_FQ void m13713_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -563,17 +563,17 @@ KERNEL_FQ void m13713_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13721-pure.cl b/OpenCL/m13721-pure.cl
index 2dbcab437..660c2cc72 100644
--- a/OpenCL/m13721-pure.cl
+++ b/OpenCL/m13721-pure.cl
@@ -156,7 +156,7 @@ KERNEL_FQ void m13721_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -364,17 +364,17 @@ KERNEL_FQ void m13721_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -592,17 +592,17 @@ KERNEL_FQ void m13721_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13722-pure.cl b/OpenCL/m13722-pure.cl
index d5dff2be6..32a3ff5f4 100644
--- a/OpenCL/m13722-pure.cl
+++ b/OpenCL/m13722-pure.cl
@@ -207,7 +207,7 @@ KERNEL_FQ void m13722_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -415,17 +415,17 @@ KERNEL_FQ void m13722_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -644,17 +644,17 @@ KERNEL_FQ void m13722_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13723-pure.cl b/OpenCL/m13723-pure.cl
index 6d895fe4f..c8d036a19 100644
--- a/OpenCL/m13723-pure.cl
+++ b/OpenCL/m13723-pure.cl
@@ -272,7 +272,7 @@ KERNEL_FQ void m13723_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -480,17 +480,17 @@ KERNEL_FQ void m13723_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -710,17 +710,17 @@ KERNEL_FQ void m13723_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13731-pure.cl b/OpenCL/m13731-pure.cl
index 98a395b95..dbb75abcd 100644
--- a/OpenCL/m13731-pure.cl
+++ b/OpenCL/m13731-pure.cl
@@ -194,7 +194,7 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -209,8 +209,8 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -421,17 +421,17 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -472,8 +472,8 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -732,17 +732,17 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -783,8 +783,8 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13732-pure.cl b/OpenCL/m13732-pure.cl
index 25b544d59..4857bdc91 100644
--- a/OpenCL/m13732-pure.cl
+++ b/OpenCL/m13732-pure.cl
@@ -245,7 +245,7 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -260,8 +260,8 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -472,17 +472,17 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -523,8 +523,8 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -784,17 +784,17 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -835,8 +835,8 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13733-pure.cl b/OpenCL/m13733-pure.cl
index 083b29cdb..c7cce43af 100644
--- a/OpenCL/m13733-pure.cl
+++ b/OpenCL/m13733-pure.cl
@@ -310,7 +310,7 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -325,8 +325,8 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -537,17 +537,17 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -588,8 +588,8 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -850,17 +850,17 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -901,8 +901,8 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_Ch[8][256];
-  LOCAL_AS u32 s_Cl[8][256];
+  LOCAL_VK u32 s_Ch[8][256];
+  LOCAL_VK u32 s_Cl[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13751-pure.cl b/OpenCL/m13751-pure.cl
index 33132d538..8ba26eeb7 100644
--- a/OpenCL/m13751-pure.cl
+++ b/OpenCL/m13751-pure.cl
@@ -140,7 +140,7 @@ KERNEL_FQ void m13751_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -296,17 +296,17 @@ KERNEL_FQ void m13751_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -533,17 +533,17 @@ KERNEL_FQ void m13751_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13752-pure.cl b/OpenCL/m13752-pure.cl
index 022a35d4e..48d7f5c99 100644
--- a/OpenCL/m13752-pure.cl
+++ b/OpenCL/m13752-pure.cl
@@ -191,7 +191,7 @@ KERNEL_FQ void m13752_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -347,17 +347,17 @@ KERNEL_FQ void m13752_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -556,17 +556,17 @@ KERNEL_FQ void m13752_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13753-pure.cl b/OpenCL/m13753-pure.cl
index 2f704e2ab..21fddcae8 100644
--- a/OpenCL/m13753-pure.cl
+++ b/OpenCL/m13753-pure.cl
@@ -256,7 +256,7 @@ KERNEL_FQ void m13753_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -412,17 +412,17 @@ KERNEL_FQ void m13753_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -622,17 +622,17 @@ KERNEL_FQ void m13753_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13771-pure.cl b/OpenCL/m13771-pure.cl
index bb03c67f6..7bbbb6a55 100644
--- a/OpenCL/m13771-pure.cl
+++ b/OpenCL/m13771-pure.cl
@@ -182,7 +182,7 @@ KERNEL_FQ void m13771_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -193,7 +193,7 @@ KERNEL_FQ void m13771_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -380,17 +380,17 @@ KERNEL_FQ void m13771_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -407,7 +407,7 @@ KERNEL_FQ void m13771_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -625,17 +625,17 @@ KERNEL_FQ void m13771_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13772-pure.cl b/OpenCL/m13772-pure.cl
index 98598e694..467ceba89 100644
--- a/OpenCL/m13772-pure.cl
+++ b/OpenCL/m13772-pure.cl
@@ -233,7 +233,7 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -244,7 +244,7 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -431,17 +431,17 @@ KERNEL_FQ void m13772_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -458,7 +458,7 @@ KERNEL_FQ void m13772_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -677,17 +677,17 @@ KERNEL_FQ void m13772_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13773-pure.cl b/OpenCL/m13773-pure.cl
index 5bfae6e2d..9d888bfc1 100644
--- a/OpenCL/m13773-pure.cl
+++ b/OpenCL/m13773-pure.cl
@@ -298,7 +298,7 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt;
 
-  LOCAL_AS keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
+  LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -309,7 +309,7 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -496,17 +496,17 @@ KERNEL_FQ void m13773_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -523,7 +523,7 @@ KERNEL_FQ void m13773_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
     s_te4[i] = te4[i];
   }
 
-  LOCAL_AS u64a s_sbob_sl64[8][256];
+  LOCAL_VK u64a s_sbob_sl64[8][256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -743,17 +743,17 @@ KERNEL_FQ void m13773_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl
index 7df76fae4..0a4f61175 100644
--- a/OpenCL/m13800_a0-optimized.cl
+++ b/OpenCL/m13800_a0-optimized.cl
@@ -435,7 +435,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_RULES_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
@@ -631,7 +631,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_RULES_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl
index eaf6d0f58..8073a941e 100644
--- a/OpenCL/m13800_a1-optimized.cl
+++ b/OpenCL/m13800_a1-optimized.cl
@@ -433,7 +433,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
@@ -685,7 +685,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl
index 8848869a0..692a64e89 100644
--- a/OpenCL/m13800_a3-optimized.cl
+++ b/OpenCL/m13800_a3-optimized.cl
@@ -747,7 +747,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
@@ -804,7 +804,7 @@ KERNEL_FQ void m13800_m08 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
@@ -861,7 +861,7 @@ KERNEL_FQ void m13800_m16 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
@@ -918,7 +918,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
@@ -975,7 +975,7 @@ KERNEL_FQ void m13800_s08 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
@@ -1032,7 +1032,7 @@ KERNEL_FQ void m13800_s16 (KERN_ATTR_VECTOR_ESALT (win8phone_t))
    * shared
    */
 
-  LOCAL_AS u32 s_esalt[32];
+  LOCAL_VK u32 s_esalt[32];
 
   for (u32 i = lid; i < 32; i += lsz)
   {
diff --git a/OpenCL/m13900_a0-optimized.cl b/OpenCL/m13900_a0-optimized.cl
index 5477b7b35..35c823240 100644
--- a/OpenCL/m13900_a0-optimized.cl
+++ b/OpenCL/m13900_a0-optimized.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -269,7 +269,7 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13900_a0-pure.cl b/OpenCL/m13900_a0-pure.cl
index 050384494..86e2825d9 100644
--- a/OpenCL/m13900_a0-pure.cl
+++ b/OpenCL/m13900_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -193,7 +193,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13900_a1-optimized.cl b/OpenCL/m13900_a1-optimized.cl
index d9a53dd41..f6eed8e47 100644
--- a/OpenCL/m13900_a1-optimized.cl
+++ b/OpenCL/m13900_a1-optimized.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -325,7 +325,7 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13900_a1-pure.cl b/OpenCL/m13900_a1-pure.cl
index 3435b5dbf..1f97e165b 100644
--- a/OpenCL/m13900_a1-pure.cl
+++ b/OpenCL/m13900_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -189,7 +189,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13900_a3-optimized.cl b/OpenCL/m13900_a3-optimized.cl
index b02da1e95..a28ac46e5 100644
--- a/OpenCL/m13900_a3-optimized.cl
+++ b/OpenCL/m13900_a3-optimized.cl
@@ -429,7 +429,7 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -499,7 +499,7 @@ KERNEL_FQ void m13900_m08 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -569,7 +569,7 @@ KERNEL_FQ void m13900_m16 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -639,7 +639,7 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -709,7 +709,7 @@ KERNEL_FQ void m13900_s08 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -779,7 +779,7 @@ KERNEL_FQ void m13900_s16 (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m13900_a3-pure.cl b/OpenCL/m13900_a3-pure.cl
index 73eadd5ca..a109bb039 100644
--- a/OpenCL/m13900_a3-pure.cl
+++ b/OpenCL/m13900_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_VECTOR ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -206,7 +206,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_VECTOR ())
    * shared
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14000_a0-pure.cl b/OpenCL/m14000_a0-pure.cl
index 5a8a3c39c..86611ffa8 100644
--- a/OpenCL/m14000_a0-pure.cl
+++ b/OpenCL/m14000_a0-pure.cl
@@ -511,8 +511,8 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -616,8 +616,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m14000_a1-pure.cl b/OpenCL/m14000_a1-pure.cl
index f8f55cf2e..ee2f6f49f 100644
--- a/OpenCL/m14000_a1-pure.cl
+++ b/OpenCL/m14000_a1-pure.cl
@@ -501,8 +501,8 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -649,8 +649,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m14100_a0-pure.cl b/OpenCL/m14100_a0-pure.cl
index 42b06753b..eeb86866e 100644
--- a/OpenCL/m14100_a0-pure.cl
+++ b/OpenCL/m14100_a0-pure.cl
@@ -555,8 +555,8 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -690,8 +690,8 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_RULES ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m14100_a1-pure.cl b/OpenCL/m14100_a1-pure.cl
index f44756c2b..a96243e94 100644
--- a/OpenCL/m14100_a1-pure.cl
+++ b/OpenCL/m14100_a1-pure.cl
@@ -545,8 +545,8 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -728,8 +728,8 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m14100_a3-pure.cl b/OpenCL/m14100_a3-pure.cl
index 854e1c36b..980de5f19 100644
--- a/OpenCL/m14100_a3-pure.cl
+++ b/OpenCL/m14100_a3-pure.cl
@@ -727,8 +727,8 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -801,8 +801,8 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ())
    * shared
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m14400_a0-optimized.cl b/OpenCL/m14400_a0-optimized.cl
index 9e24a41e0..9d51ac908 100644
--- a/OpenCL/m14400_a0-optimized.cl
+++ b/OpenCL/m14400_a0-optimized.cl
@@ -132,7 +132,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -403,7 +403,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14400_a0-pure.cl b/OpenCL/m14400_a0-pure.cl
index 51df83735..178eac247 100644
--- a/OpenCL/m14400_a0-pure.cl
+++ b/OpenCL/m14400_a0-pure.cl
@@ -42,7 +42,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -289,7 +289,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_RULES ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14400_a1-optimized.cl b/OpenCL/m14400_a1-optimized.cl
index 0d0376a4a..c2702dd1e 100644
--- a/OpenCL/m14400_a1-optimized.cl
+++ b/OpenCL/m14400_a1-optimized.cl
@@ -132,7 +132,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -467,7 +467,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14400_a1-pure.cl b/OpenCL/m14400_a1-pure.cl
index 822fa5248..1a6dd7c89 100644
--- a/OpenCL/m14400_a1-pure.cl
+++ b/OpenCL/m14400_a1-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -285,7 +285,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14400_a3-optimized.cl b/OpenCL/m14400_a3-optimized.cl
index 47d23112a..cb3d80b0e 100644
--- a/OpenCL/m14400_a3-optimized.cl
+++ b/OpenCL/m14400_a3-optimized.cl
@@ -642,7 +642,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -712,7 +712,7 @@ KERNEL_FQ void m14400_m08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -782,7 +782,7 @@ KERNEL_FQ void m14400_m16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -852,7 +852,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -922,7 +922,7 @@ KERNEL_FQ void m14400_s08 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -992,7 +992,7 @@ KERNEL_FQ void m14400_s16 (KERN_ATTR_BASIC ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14400_a3-pure.cl b/OpenCL/m14400_a3-pure.cl
index 97cbb31ca..eaea0495d 100644
--- a/OpenCL/m14400_a3-pure.cl
+++ b/OpenCL/m14400_a3-pure.cl
@@ -40,7 +40,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -310,7 +310,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_VECTOR ())
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14611-pure.cl b/OpenCL/m14611-pure.cl
index 02f12ab4f..6869390c2 100644
--- a/OpenCL/m14611-pure.cl
+++ b/OpenCL/m14611-pure.cl
@@ -304,17 +304,17 @@ KERNEL_FQ void m14611_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14621-pure.cl b/OpenCL/m14621-pure.cl
index deec44318..69d0f8582 100644
--- a/OpenCL/m14621-pure.cl
+++ b/OpenCL/m14621-pure.cl
@@ -343,17 +343,17 @@ KERNEL_FQ void m14621_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14631-pure.cl b/OpenCL/m14631-pure.cl
index 8b65f8975..d85572cb6 100644
--- a/OpenCL/m14631-pure.cl
+++ b/OpenCL/m14631-pure.cl
@@ -399,17 +399,17 @@ KERNEL_FQ void m14631_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14641-pure.cl b/OpenCL/m14641-pure.cl
index 26ba30663..19e9829c0 100644
--- a/OpenCL/m14641-pure.cl
+++ b/OpenCL/m14641-pure.cl
@@ -304,17 +304,17 @@ KERNEL_FQ void m14641_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14700-pure.cl b/OpenCL/m14700-pure.cl
index bf594f354..fab345353 100644
--- a/OpenCL/m14700-pure.cl
+++ b/OpenCL/m14700-pure.cl
@@ -240,17 +240,17 @@ KERNEL_FQ void m14700_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, itunes_back
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14800-pure.cl b/OpenCL/m14800-pure.cl
index 106c0ee48..93495bfbb 100644
--- a/OpenCL/m14800-pure.cl
+++ b/OpenCL/m14800-pure.cl
@@ -504,17 +504,17 @@ KERNEL_FQ void m14800_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, itunes_ba
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14900_a0-optimized.cl b/OpenCL/m14900_a0-optimized.cl
index 16cf26446..f2d6f4c98 100644
--- a/OpenCL/m14900_a0-optimized.cl
+++ b/OpenCL/m14900_a0-optimized.cl
@@ -119,7 +119,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_RULES ())
    * s_ftable
    */
 
-  LOCAL_AS u8 s_ftable[256];
+  LOCAL_VK u8 s_ftable[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -210,7 +210,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_RULES ())
    * s_ftable
    */
 
-  LOCAL_AS u8 s_ftable[256];
+  LOCAL_VK u8 s_ftable[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14900_a1-optimized.cl b/OpenCL/m14900_a1-optimized.cl
index b9753524c..14722335c 100644
--- a/OpenCL/m14900_a1-optimized.cl
+++ b/OpenCL/m14900_a1-optimized.cl
@@ -117,7 +117,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_BASIC ())
    * s_ftable
    */
 
-  LOCAL_AS u8 s_ftable[256];
+  LOCAL_VK u8 s_ftable[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -272,7 +272,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_BASIC ())
    * s_ftable
    */
 
-  LOCAL_AS u8 s_ftable[256];
+  LOCAL_VK u8 s_ftable[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m14900_a3-optimized.cl b/OpenCL/m14900_a3-optimized.cl
index a64f22df7..bd89b8874 100644
--- a/OpenCL/m14900_a3-optimized.cl
+++ b/OpenCL/m14900_a3-optimized.cl
@@ -225,7 +225,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u8 s_ftable[256];
+  LOCAL_VK u8 s_ftable[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -299,7 +299,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u8 s_ftable[256];
+  LOCAL_VK u8 s_ftable[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m15300-pure.cl b/OpenCL/m15300-pure.cl
index b6abc46d1..9a2b01cf3 100644
--- a/OpenCL/m15300-pure.cl
+++ b/OpenCL/m15300-pure.cl
@@ -384,8 +384,8 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
diff --git a/OpenCL/m15900-pure.cl b/OpenCL/m15900-pure.cl
index ae5455665..2c2dd5adb 100644
--- a/OpenCL/m15900-pure.cl
+++ b/OpenCL/m15900-pure.cl
@@ -513,17 +513,17 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m16000_a0-pure.cl b/OpenCL/m16000_a0-pure.cl
index c78cbff01..a7f968a00 100644
--- a/OpenCL/m16000_a0-pure.cl
+++ b/OpenCL/m16000_a0-pure.cl
@@ -508,8 +508,8 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -532,7 +532,7 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  LOCAL_AS u32 s_tripcode_salt[128];
+  LOCAL_VK u32 s_tripcode_salt[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -601,8 +601,8 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_RULES ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -625,7 +625,7 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_RULES ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  LOCAL_AS u32 s_tripcode_salt[128];
+  LOCAL_VK u32 s_tripcode_salt[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m16000_a1-pure.cl b/OpenCL/m16000_a1-pure.cl
index 578021fe8..653574dc6 100644
--- a/OpenCL/m16000_a1-pure.cl
+++ b/OpenCL/m16000_a1-pure.cl
@@ -506,8 +506,8 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -530,7 +530,7 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  LOCAL_AS u32 s_tripcode_salt[128];
+  LOCAL_VK u32 s_tripcode_salt[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -678,8 +678,8 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_BASIC ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -702,7 +702,7 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_BASIC ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  LOCAL_AS u32 s_tripcode_salt[128];
+  LOCAL_VK u32 s_tripcode_salt[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m16000_a3-pure.cl b/OpenCL/m16000_a3-pure.cl
index 12e73ed34..d847db740 100644
--- a/OpenCL/m16000_a3-pure.cl
+++ b/OpenCL/m16000_a3-pure.cl
@@ -506,8 +506,8 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -530,7 +530,7 @@ KERNEL_FQ void m16000_mxx (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  LOCAL_AS u32 s_tripcode_salt[128];
+  LOCAL_VK u32 s_tripcode_salt[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
@@ -627,8 +627,8 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_VECTOR ())
    * sbox, kbox
    */
 
-  LOCAL_AS u32 s_SPtrans[8][64];
-  LOCAL_AS u32 s_skb[8][64];
+  LOCAL_VK u32 s_SPtrans[8][64];
+  LOCAL_VK u32 s_skb[8][64];
 
   for (u32 i = lid; i < 64; i += lsz)
   {
@@ -651,7 +651,7 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_VECTOR ())
     s_skb[7][i] = c_skb[7][i];
   }
 
-  LOCAL_AS u32 s_tripcode_salt[128];
+  LOCAL_VK u32 s_tripcode_salt[128];
 
   for (u32 i = lid; i < 128; i += lsz)
   {
diff --git a/OpenCL/m16200-pure.cl b/OpenCL/m16200-pure.cl
index eb9b4fc6f..5f48e8378 100644
--- a/OpenCL/m16200-pure.cl
+++ b/OpenCL/m16200-pure.cl
@@ -281,17 +281,17 @@ KERNEL_FQ void m16200_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m16300-pure.cl b/OpenCL/m16300-pure.cl
index 5201a8ccc..beeba498d 100644
--- a/OpenCL/m16300-pure.cl
+++ b/OpenCL/m16300-pure.cl
@@ -415,17 +415,17 @@ KERNEL_FQ void m16300_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ethereum_
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m16600_a0-optimized.cl b/OpenCL/m16600_a0-optimized.cl
index 425217eb7..61cd6a5ae 100644
--- a/OpenCL/m16600_a0-optimized.cl
+++ b/OpenCL/m16600_a0-optimized.cl
@@ -41,17 +41,17 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -426,17 +426,17 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m16600_a0-pure.cl b/OpenCL/m16600_a0-pure.cl
index ed4328db1..3e062770f 100644
--- a/OpenCL/m16600_a0-pure.cl
+++ b/OpenCL/m16600_a0-pure.cl
@@ -41,17 +41,17 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -232,17 +232,17 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m16600_a1-optimized.cl b/OpenCL/m16600_a1-optimized.cl
index fdb694008..f67fc3205 100644
--- a/OpenCL/m16600_a1-optimized.cl
+++ b/OpenCL/m16600_a1-optimized.cl
@@ -39,17 +39,17 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -482,17 +482,17 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m16600_a1-pure.cl b/OpenCL/m16600_a1-pure.cl
index 1ac65adad..85b62b520 100644
--- a/OpenCL/m16600_a1-pure.cl
+++ b/OpenCL/m16600_a1-pure.cl
@@ -39,17 +39,17 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -228,17 +228,17 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m16600_a3-optimized.cl b/OpenCL/m16600_a3-optimized.cl
index 389124a7c..edc13bfd6 100644
--- a/OpenCL/m16600_a3-optimized.cl
+++ b/OpenCL/m16600_a3-optimized.cl
@@ -336,17 +336,17 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -431,17 +431,17 @@ KERNEL_FQ void m16600_m08 (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -526,17 +526,17 @@ KERNEL_FQ void m16600_m16 (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -621,17 +621,17 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -716,17 +716,17 @@ KERNEL_FQ void m16600_s08 (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -811,17 +811,17 @@ KERNEL_FQ void m16600_s16 (KERN_ATTR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m16600_a3-pure.cl b/OpenCL/m16600_a3-pure.cl
index ddd3d45e3..8c46f555e 100644
--- a/OpenCL/m16600_a3-pure.cl
+++ b/OpenCL/m16600_a3-pure.cl
@@ -39,17 +39,17 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -241,17 +241,17 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m18200_a0-optimized.cl b/OpenCL/m18200_a0-optimized.cl
index 091be57aa..e980572b3 100644
--- a/OpenCL/m18200_a0-optimized.cl
+++ b/OpenCL/m18200_a0-optimized.cl
@@ -49,7 +49,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -608,9 +608,9 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -706,9 +706,9 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m18200_a0-pure.cl b/OpenCL/m18200_a0-pure.cl
index 878c7b175..49b1f127b 100644
--- a/OpenCL/m18200_a0-pure.cl
+++ b/OpenCL/m18200_a0-pure.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -402,9 +402,9 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t))
 
   COPY_PW (pws[gid]);
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -464,9 +464,9 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t))
 
   COPY_PW (pws[gid]);
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m18200_a1-optimized.cl b/OpenCL/m18200_a1-optimized.cl
index c9a7e49d3..1fdfd2d8f 100644
--- a/OpenCL/m18200_a1-optimized.cl
+++ b/OpenCL/m18200_a1-optimized.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -605,9 +605,9 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -752,9 +752,9 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t))
    * shared
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m18200_a1-pure.cl b/OpenCL/m18200_a1-pure.cl
index 258f3323b..08ee938fd 100644
--- a/OpenCL/m18200_a1-pure.cl
+++ b/OpenCL/m18200_a1-pure.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -398,9 +398,9 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t))
    * base
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -458,9 +458,9 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t))
    * base
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m18200_a3-optimized.cl b/OpenCL/m18200_a3-optimized.cl
index 3726bf080..0a196d2b9 100644
--- a/OpenCL/m18200_a3-optimized.cl
+++ b/OpenCL/m18200_a3-optimized.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -675,9 +675,9 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -727,9 +727,9 @@ KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -783,9 +783,9 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -835,9 +835,9 @@ KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t))
    * main
    */
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m18200_a3-pure.cl b/OpenCL/m18200_a3-pure.cl
index 666774877..b7ce0ec97 100644
--- a/OpenCL/m18200_a3-pure.cl
+++ b/OpenCL/m18200_a3-pure.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -407,9 +407,9 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t))
     w[idx] = pws[gid].i[idx];
   }
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -480,9 +480,9 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t))
     w[idx] = pws[gid].i[idx];
   }
 
-  LOCAL_AS RC4_KEY rc4_keys[64];
+  LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m18300-pure.cl b/OpenCL/m18300-pure.cl
index 26f593a7b..46e52a1aa 100644
--- a/OpenCL/m18300-pure.cl
+++ b/OpenCL/m18300-pure.cl
@@ -281,17 +281,17 @@ KERNEL_FQ void m18300_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m18400-pure.cl b/OpenCL/m18400-pure.cl
index fe5391fb0..b6645e8a9 100644
--- a/OpenCL/m18400-pure.cl
+++ b/OpenCL/m18400-pure.cl
@@ -277,17 +277,17 @@ KERNEL_FQ void m18400_comp (KERN_ATTR_TMPS_ESALT (odf12_tmp_t, odf12_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m18500_a0-pure.cl b/OpenCL/m18500_a0-pure.cl
index 415506b0c..922ba1c01 100644
--- a/OpenCL/m18500_a0-pure.cl
+++ b/OpenCL/m18500_a0-pure.cl
@@ -39,7 +39,7 @@ KERNEL_FQ void m18500_mxx (KERN_ATTR_RULES ())
   const u64 gid = get_global_id (0);
   const u64 lsz = get_local_size (0);
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -155,7 +155,7 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_RULES ())
   const u64 gid = get_global_id (0);
   const u64 lsz = get_local_size (0);
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m18500_a1-pure.cl b/OpenCL/m18500_a1-pure.cl
index 84c725944..3047a0129 100644
--- a/OpenCL/m18500_a1-pure.cl
+++ b/OpenCL/m18500_a1-pure.cl
@@ -39,7 +39,7 @@ KERNEL_FQ void m18500_mxx (KERN_ATTR_RULES ())
   const u64 gid = get_global_id (0);
   const u64 lsz = get_local_size (0);
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -154,7 +154,7 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_RULES ())
   const u64 gid = get_global_id (0);
   const u64 lsz = get_local_size (0);
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m18500_a3-pure.cl b/OpenCL/m18500_a3-pure.cl
index d758dc5c9..5ef993ac0 100644
--- a/OpenCL/m18500_a3-pure.cl
+++ b/OpenCL/m18500_a3-pure.cl
@@ -37,7 +37,7 @@ KERNEL_FQ void m18500_mxx (KERN_ATTR_VECTOR ())
   const u64 gid = get_global_id (0);
   const u64 lsz = get_local_size (0);
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -164,7 +164,7 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_VECTOR ())
   const u64 gid = get_global_id (0);
   const u64 lsz = get_local_size (0);
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl
index 2f8e0ea18..9d03c013c 100644
--- a/OpenCL/m18600-pure.cl
+++ b/OpenCL/m18600-pure.cl
@@ -616,15 +616,15 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m18
     P[i] = c_pbox[i] ^ ukey[i % 4];
   }
 
-  LOCAL_AS u32 S0_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S1_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S2_all[FIXED_LOCAL_SIZE][256];
-  LOCAL_AS u32 S3_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
+  LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_AS u32 *S0 = S0_all[lid];
-  LOCAL_AS u32 *S1 = S1_all[lid];
-  LOCAL_AS u32 *S2 = S2_all[lid];
-  LOCAL_AS u32 *S3 = S3_all[lid];
+  LOCAL_VK u32 *S0 = S0_all[lid];
+  LOCAL_VK u32 *S1 = S1_all[lid];
+  LOCAL_VK u32 *S2 = S2_all[lid];
+  LOCAL_VK u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {
diff --git a/OpenCL/m18900-pure.cl b/OpenCL/m18900-pure.cl
index e203d4326..0eaa5d64d 100644
--- a/OpenCL/m18900-pure.cl
+++ b/OpenCL/m18900-pure.cl
@@ -245,17 +245,17 @@ KERNEL_FQ void m18900_comp (KERN_ATTR_TMPS_ESALT (android_backup_tmp_t, android_
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m19500_a0-pure.cl b/OpenCL/m19500_a0-pure.cl
index 665250791..c9b4cbfad 100644
--- a/OpenCL/m19500_a0-pure.cl
+++ b/OpenCL/m19500_a0-pure.cl
@@ -52,7 +52,7 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_RULES_ESALT (devise_hash_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -187,7 +187,7 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_RULES_ESALT (devise_hash_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m19500_a1-pure.cl b/OpenCL/m19500_a1-pure.cl
index 2a4ba3d16..a76d95d70 100644
--- a/OpenCL/m19500_a1-pure.cl
+++ b/OpenCL/m19500_a1-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_ESALT (devise_hash_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -181,7 +181,7 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_ESALT (devise_hash_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m19500_a3-pure.cl b/OpenCL/m19500_a3-pure.cl
index 85b752854..50dce1e7d 100644
--- a/OpenCL/m19500_a3-pure.cl
+++ b/OpenCL/m19500_a3-pure.cl
@@ -50,7 +50,7 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
@@ -196,7 +196,7 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t))
    * bin2asc table
    */
 
-  LOCAL_AS u32 l_bin2asc[256];
+  LOCAL_VK u32 l_bin2asc[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m19600-pure.cl b/OpenCL/m19600-pure.cl
index 14c62dd42..1403a7c4d 100644
--- a/OpenCL/m19600-pure.cl
+++ b/OpenCL/m19600-pure.cl
@@ -297,17 +297,17 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m19700-pure.cl b/OpenCL/m19700-pure.cl
index 2035901d7..4c7d9b466 100644
--- a/OpenCL/m19700-pure.cl
+++ b/OpenCL/m19700-pure.cl
@@ -297,17 +297,17 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m19800-pure.cl b/OpenCL/m19800-pure.cl
index 1ec3fb4f0..bcadc5a10 100644
--- a/OpenCL/m19800-pure.cl
+++ b/OpenCL/m19800-pure.cl
@@ -297,17 +297,17 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m19900-pure.cl b/OpenCL/m19900-pure.cl
index 35c0abb22..ed646ead9 100644
--- a/OpenCL/m19900-pure.cl
+++ b/OpenCL/m19900-pure.cl
@@ -298,17 +298,17 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t))
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m20011-pure.cl b/OpenCL/m20011-pure.cl
index e6136020e..6257b4c42 100644
--- a/OpenCL/m20011-pure.cl
+++ b/OpenCL/m20011-pure.cl
@@ -334,17 +334,17 @@ KERNEL_FQ void m20011_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m20012-pure.cl b/OpenCL/m20012-pure.cl
index 0fad4fdbf..7bf0534a6 100644
--- a/OpenCL/m20012-pure.cl
+++ b/OpenCL/m20012-pure.cl
@@ -334,17 +334,17 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/OpenCL/m20013-pure.cl b/OpenCL/m20013-pure.cl
index c711846b5..5e6d04dc7 100644
--- a/OpenCL/m20013-pure.cl
+++ b/OpenCL/m20013-pure.cl
@@ -334,17 +334,17 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
 
   #ifdef REAL_SHM
 
-  LOCAL_AS u32 s_td0[256];
-  LOCAL_AS u32 s_td1[256];
-  LOCAL_AS u32 s_td2[256];
-  LOCAL_AS u32 s_td3[256];
-  LOCAL_AS u32 s_td4[256];
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
 
-  LOCAL_AS u32 s_te0[256];
-  LOCAL_AS u32 s_te1[256];
-  LOCAL_AS u32 s_te2[256];
-  LOCAL_AS u32 s_te3[256];
-  LOCAL_AS u32 s_te4[256];
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
 
   for (u32 i = lid; i < 256; i += lsz)
   {
diff --git a/src/backend.c b/src/backend.c
index 000e826b1..66e19d084 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -3090,6 +3090,31 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
   if (device_param->is_cuda == true)
   {
+    u64 local_mem_size = 0;
+
+    switch (kern_run)
+    {
+      case KERN_RUN_1:      local_mem_size  = device_param->kernel_local_mem_size1;       break;
+      case KERN_RUN_12:     local_mem_size  = device_param->kernel_local_mem_size12;      break;
+      case KERN_RUN_2:      local_mem_size  = device_param->kernel_local_mem_size2;       break;
+      case KERN_RUN_23:     local_mem_size  = device_param->kernel_local_mem_size23;      break;
+      case KERN_RUN_3:      local_mem_size  = device_param->kernel_local_mem_size3;       break;
+      case KERN_RUN_4:      local_mem_size  = device_param->kernel_local_mem_size4;       break;
+      case KERN_RUN_INIT2:  local_mem_size  = device_param->kernel_local_mem_size_init2;  break;
+      case KERN_RUN_LOOP2:  local_mem_size  = device_param->kernel_local_mem_size_loop2;  break;
+      case KERN_RUN_AUX1:   local_mem_size  = device_param->kernel_local_mem_size_aux1;   break;
+      case KERN_RUN_AUX2:   local_mem_size  = device_param->kernel_local_mem_size_aux2;   break;
+      case KERN_RUN_AUX3:   local_mem_size  = device_param->kernel_local_mem_size_aux3;   break;
+      case KERN_RUN_AUX4:   local_mem_size  = device_param->kernel_local_mem_size_aux4;   break;
+    }
+
+    if (local_mem_size)
+    {
+      const u32 max_threads_possible = (device_param->device_local_mem_size - 240) / local_mem_size;
+
+      kernel_threads = MIN (kernel_threads, max_threads_possible);
+    }
+
     CUfunction cuda_function = NULL;
 
     if (device_param->is_cuda == true)
@@ -5294,6 +5319,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         device_param->skipped = true;
       }
 
+      device_param->device_local_mem_size = max_shared_memory_per_block;
+
       // device_max_constant_buffer_size
 
       int device_max_constant_buffer_size = 0;
diff --git a/src/modules/module_03200.c b/src/modules/module_03200.c
index 6fd2ecea9..6cd15c7c7 100644
--- a/src/modules/module_03200.c
+++ b/src/modules/module_03200.c
@@ -104,7 +104,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
       // I did some research on this and it seems to be related with the datatype.
       // For example, if i used u8 instead, there's only 1 byte wasted.
 
-      overhead = 4;
+      if (device_param->is_opencl == true)
+      {
+        overhead = 4;
+      }
+
+      // no clue yet where this is coming from
+
+      if (device_param->is_cuda == true)
+      {
+        overhead = 240;
+      }
     }
 
     if (user_options->kernel_threads_chgd == true)

From 03b2d3fb69eec9791fd61af2bbfcbc251192dcd3 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 7 May 2019 12:08:54 +0200
Subject: [PATCH 35/73] Fix use of LOCAL_VK and LOCAL_AS in -m 3200

---
 OpenCL/m03200-pure.cl | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/OpenCL/m03200-pure.cl b/OpenCL/m03200-pure.cl
index 92c5775ea..5f701666f 100644
--- a/OpenCL/m03200-pure.cl
+++ b/OpenCL/m03200-pure.cl
@@ -449,10 +449,10 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03
   LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_VK u32 *S0 = S0_all[lid];
-  LOCAL_VK u32 *S1 = S1_all[lid];
-  LOCAL_VK u32 *S2 = S2_all[lid];
-  LOCAL_VK u32 *S3 = S3_all[lid];
+  LOCAL_AS u32 *S0 = S0_all[lid];
+  LOCAL_AS u32 *S1 = S1_all[lid];
+  LOCAL_AS u32 *S2 = S2_all[lid];
+  LOCAL_AS u32 *S3 = S3_all[lid];
 
   // initstate
 
@@ -618,10 +618,10 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03
   LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_VK u32 *S0 = S0_all[lid];
-  LOCAL_VK u32 *S1 = S1_all[lid];
-  LOCAL_VK u32 *S2 = S2_all[lid];
-  LOCAL_VK u32 *S3 = S3_all[lid];
+  LOCAL_AS u32 *S0 = S0_all[lid];
+  LOCAL_AS u32 *S1 = S1_all[lid];
+  LOCAL_AS u32 *S2 = S2_all[lid];
+  LOCAL_AS u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {
@@ -803,10 +803,10 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03
   LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_VK u32 *S0 = S0_all[lid];
-  LOCAL_VK u32 *S1 = S1_all[lid];
-  LOCAL_VK u32 *S2 = S2_all[lid];
-  LOCAL_VK u32 *S3 = S3_all[lid];
+  LOCAL_AS u32 *S0 = S0_all[lid];
+  LOCAL_AS u32 *S1 = S1_all[lid];
+  LOCAL_AS u32 *S2 = S2_all[lid];
+  LOCAL_AS u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {

From 7e5356126c444e943095f82303ff12c3f7bce412 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 7 May 2019 12:22:37 +0200
Subject: [PATCH 36/73] Fix more use of LOCAL_VK and LOCAL_AS

---
 OpenCL/m07500_a0-optimized.cl |  6 +++---
 OpenCL/m07500_a0-pure.cl      |  6 +++---
 OpenCL/m07500_a1-optimized.cl |  6 +++---
 OpenCL/m07500_a1-pure.cl      |  6 +++---
 OpenCL/m07500_a3-optimized.cl | 10 +++++-----
 OpenCL/m07500_a3-pure.cl      |  6 +++---
 OpenCL/m09000-pure.cl         | 16 ++++++++--------
 OpenCL/m09700_a0-optimized.cl |  6 +++---
 OpenCL/m09700_a1-optimized.cl |  6 +++---
 OpenCL/m09700_a3-optimized.cl |  6 +++---
 OpenCL/m09710_a0-optimized.cl |  6 +++---
 OpenCL/m09710_a1-optimized.cl |  6 +++---
 OpenCL/m09710_a3-optimized.cl |  6 +++---
 OpenCL/m09800_a0-optimized.cl |  6 +++---
 OpenCL/m09800_a1-optimized.cl |  6 +++---
 OpenCL/m09800_a3-optimized.cl |  6 +++---
 OpenCL/m09810_a0-optimized.cl |  6 +++---
 OpenCL/m09810_a1-optimized.cl |  6 +++---
 OpenCL/m09810_a3-optimized.cl |  6 +++---
 OpenCL/m10400_a0-optimized.cl |  6 +++---
 OpenCL/m10400_a1-optimized.cl |  6 +++---
 OpenCL/m10400_a3-optimized.cl |  6 +++---
 OpenCL/m10410_a0-optimized.cl |  6 +++---
 OpenCL/m10410_a1-optimized.cl |  6 +++---
 OpenCL/m10410_a3-optimized.cl |  6 +++---
 OpenCL/m10500-pure.cl         |  4 ++--
 OpenCL/m13100_a0-optimized.cl |  6 +++---
 OpenCL/m13100_a0-pure.cl      |  6 +++---
 OpenCL/m13100_a1-optimized.cl |  6 +++---
 OpenCL/m13100_a1-pure.cl      |  6 +++---
 OpenCL/m13100_a3-optimized.cl | 10 +++++-----
 OpenCL/m13100_a3-pure.cl      |  6 +++---
 OpenCL/m18200_a0-optimized.cl |  6 +++---
 OpenCL/m18200_a0-pure.cl      |  6 +++---
 OpenCL/m18200_a1-optimized.cl |  6 +++---
 OpenCL/m18200_a1-pure.cl      |  6 +++---
 OpenCL/m18200_a3-optimized.cl | 10 +++++-----
 OpenCL/m18200_a3-pure.cl      |  6 +++---
 OpenCL/m18600-pure.cl         |  8 ++++----
 39 files changed, 128 insertions(+), 128 deletions(-)

diff --git a/OpenCL/m07500_a0-optimized.cl b/OpenCL/m07500_a0-optimized.cl
index cffd768b1..60c658a0a 100644
--- a/OpenCL/m07500_a0-optimized.cl
+++ b/OpenCL/m07500_a0-optimized.cl
@@ -50,7 +50,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -453,7 +453,7 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_RULES_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
@@ -559,7 +559,7 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_RULES_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
diff --git a/OpenCL/m07500_a0-pure.cl b/OpenCL/m07500_a0-pure.cl
index 7bdf81668..a26f705c1 100644
--- a/OpenCL/m07500_a0-pure.cl
+++ b/OpenCL/m07500_a0-pure.cl
@@ -49,7 +49,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -298,7 +298,7 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_RULES_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -369,7 +369,7 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_RULES_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m07500_a1-optimized.cl b/OpenCL/m07500_a1-optimized.cl
index 4bf1a6a05..ddcc7a51a 100644
--- a/OpenCL/m07500_a1-optimized.cl
+++ b/OpenCL/m07500_a1-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -451,7 +451,7 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
@@ -607,7 +607,7 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
diff --git a/OpenCL/m07500_a1-pure.cl b/OpenCL/m07500_a1-pure.cl
index e6d11bf6d..4a812ac9e 100644
--- a/OpenCL/m07500_a1-pure.cl
+++ b/OpenCL/m07500_a1-pure.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -294,7 +294,7 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -363,7 +363,7 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m07500_a3-optimized.cl b/OpenCL/m07500_a3-optimized.cl
index f8c6d73b9..68e154486 100644
--- a/OpenCL/m07500_a3-optimized.cl
+++ b/OpenCL/m07500_a3-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -530,7 +530,7 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -582,7 +582,7 @@ KERNEL_FQ void m07500_m08 (KERN_ATTR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -638,7 +638,7 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -690,7 +690,7 @@ KERNEL_FQ void m07500_s08 (KERN_ATTR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m07500_a3-pure.cl b/OpenCL/m07500_a3-pure.cl
index 4e1257620..e6a6c4b01 100644
--- a/OpenCL/m07500_a3-pure.cl
+++ b/OpenCL/m07500_a3-pure.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -315,7 +315,7 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -409,7 +409,7 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m09000-pure.cl b/OpenCL/m09000-pure.cl
index 6959ad6b6..591cdc0cf 100644
--- a/OpenCL/m09000-pure.cl
+++ b/OpenCL/m09000-pure.cl
@@ -476,10 +476,10 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09
   LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_VK u32 *S0 = S0_all[lid];
-  LOCAL_VK u32 *S1 = S1_all[lid];
-  LOCAL_VK u32 *S2 = S2_all[lid];
-  LOCAL_VK u32 *S3 = S3_all[lid];
+  LOCAL_AS u32 *S0 = S0_all[lid];
+  LOCAL_AS u32 *S1 = S1_all[lid];
+  LOCAL_AS u32 *S2 = S2_all[lid];
+  LOCAL_AS u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {
@@ -607,10 +607,10 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09
   LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_VK u32 *S0 = S0_all[lid];
-  LOCAL_VK u32 *S1 = S1_all[lid];
-  LOCAL_VK u32 *S2 = S2_all[lid];
-  LOCAL_VK u32 *S3 = S3_all[lid];
+  LOCAL_AS u32 *S0 = S0_all[lid];
+  LOCAL_AS u32 *S1 = S1_all[lid];
+  LOCAL_AS u32 *S2 = S2_all[lid];
+  LOCAL_AS u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {
diff --git a/OpenCL/m09700_a0-optimized.cl b/OpenCL/m09700_a0-optimized.cl
index c92fbb483..dbbbf4b55 100644
--- a/OpenCL/m09700_a0-optimized.cl
+++ b/OpenCL/m09700_a0-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -529,7 +529,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -716,7 +716,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09700_a1-optimized.cl b/OpenCL/m09700_a1-optimized.cl
index e404894d0..1c3a59809 100644
--- a/OpenCL/m09700_a1-optimized.cl
+++ b/OpenCL/m09700_a1-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -527,7 +527,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -772,7 +772,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09700_a3-optimized.cl b/OpenCL/m09700_a3-optimized.cl
index 82d6d308e..032b6f5eb 100644
--- a/OpenCL/m09700_a3-optimized.cl
+++ b/OpenCL/m09700_a3-optimized.cl
@@ -43,7 +43,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -157,7 +157,7 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -563,7 +563,7 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09710_a0-optimized.cl b/OpenCL/m09710_a0-optimized.cl
index a819ab28b..4359393f5 100644
--- a/OpenCL/m09710_a0-optimized.cl
+++ b/OpenCL/m09710_a0-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -185,7 +185,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -323,7 +323,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09710_a1-optimized.cl b/OpenCL/m09710_a1-optimized.cl
index 5bf2afb72..fa905ed7c 100644
--- a/OpenCL/m09710_a1-optimized.cl
+++ b/OpenCL/m09710_a1-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -183,7 +183,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -367,7 +367,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09710_a3-optimized.cl b/OpenCL/m09710_a3-optimized.cl
index 9def38990..ae9bd0383 100644
--- a/OpenCL/m09710_a3-optimized.cl
+++ b/OpenCL/m09710_a3-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -160,7 +160,7 @@ DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -269,7 +269,7 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09800_a0-optimized.cl b/OpenCL/m09800_a0-optimized.cl
index 7b5847753..ffa5c1388 100644
--- a/OpenCL/m09800_a0-optimized.cl
+++ b/OpenCL/m09800_a0-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -185,7 +185,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -385,7 +385,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09800_a1-optimized.cl b/OpenCL/m09800_a1-optimized.cl
index d78f9f799..07bfb15ca 100644
--- a/OpenCL/m09800_a1-optimized.cl
+++ b/OpenCL/m09800_a1-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -183,7 +183,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -433,7 +433,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09800_a3-optimized.cl b/OpenCL/m09800_a3-optimized.cl
index 60c77499e..303b177a8 100644
--- a/OpenCL/m09800_a3-optimized.cl
+++ b/OpenCL/m09800_a3-optimized.cl
@@ -43,7 +43,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -157,7 +157,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -325,7 +325,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m09810_a0-optimized.cl b/OpenCL/m09810_a0-optimized.cl
index 7d68831f8..5db2264a6 100644
--- a/OpenCL/m09810_a0-optimized.cl
+++ b/OpenCL/m09810_a0-optimized.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -185,7 +185,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -310,7 +310,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09810_a1-optimized.cl b/OpenCL/m09810_a1-optimized.cl
index a94225f9f..69ecab5c3 100644
--- a/OpenCL/m09810_a1-optimized.cl
+++ b/OpenCL/m09810_a1-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -183,7 +183,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -354,7 +354,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m09810_a3-optimized.cl b/OpenCL/m09810_a3-optimized.cl
index c4dbe0a54..495adc734 100644
--- a/OpenCL/m09810_a3-optimized.cl
+++ b/OpenCL/m09810_a3-optimized.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -160,7 +160,7 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
@@ -254,7 +254,7 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * esalt
diff --git a/OpenCL/m10400_a0-optimized.cl b/OpenCL/m10400_a0-optimized.cl
index 8460b1d5b..55332396a 100644
--- a/OpenCL/m10400_a0-optimized.cl
+++ b/OpenCL/m10400_a0-optimized.cl
@@ -72,7 +72,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -194,7 +194,7 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_RULES_ESALT (pdf_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -370,7 +370,7 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_RULES_ESALT (pdf_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
diff --git a/OpenCL/m10400_a1-optimized.cl b/OpenCL/m10400_a1-optimized.cl
index 15337df7f..c10d184ca 100644
--- a/OpenCL/m10400_a1-optimized.cl
+++ b/OpenCL/m10400_a1-optimized.cl
@@ -70,7 +70,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -192,7 +192,7 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -428,7 +428,7 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
diff --git a/OpenCL/m10400_a3-optimized.cl b/OpenCL/m10400_a3-optimized.cl
index dcc1e12f6..49eaac162 100644
--- a/OpenCL/m10400_a3-optimized.cl
+++ b/OpenCL/m10400_a3-optimized.cl
@@ -70,7 +70,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -169,7 +169,7 @@ DECLSPEC void m10400m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -335,7 +335,7 @@ DECLSPEC void m10400s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
diff --git a/OpenCL/m10410_a0-optimized.cl b/OpenCL/m10410_a0-optimized.cl
index db2aec224..d91c72621 100644
--- a/OpenCL/m10410_a0-optimized.cl
+++ b/OpenCL/m10410_a0-optimized.cl
@@ -72,7 +72,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -193,7 +193,7 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t))
    */
 
   LOCAL_VK RC4_KEY rc4_keys[64];
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
@@ -265,7 +265,7 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t))
    */
 
   LOCAL_VK RC4_KEY rc4_keys[64];
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * digest
diff --git a/OpenCL/m10410_a1-optimized.cl b/OpenCL/m10410_a1-optimized.cl
index 4ba60e88c..8e9c1fef6 100644
--- a/OpenCL/m10410_a1-optimized.cl
+++ b/OpenCL/m10410_a1-optimized.cl
@@ -70,7 +70,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -192,7 +192,7 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
@@ -334,7 +334,7 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * U_buf
diff --git a/OpenCL/m10410_a3-optimized.cl b/OpenCL/m10410_a3-optimized.cl
index 30d26f4d9..cef243a5c 100644
--- a/OpenCL/m10410_a3-optimized.cl
+++ b/OpenCL/m10410_a3-optimized.cl
@@ -70,7 +70,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -169,7 +169,7 @@ DECLSPEC void m10410m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
@@ -212,7 +212,7 @@ DECLSPEC void m10410s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
    * shared
    */
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * digest
diff --git a/OpenCL/m10500-pure.cl b/OpenCL/m10500-pure.cl
index 2ba76ba62..51ee73127 100644
--- a/OpenCL/m10500-pure.cl
+++ b/OpenCL/m10500-pure.cl
@@ -76,7 +76,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -379,7 +379,7 @@ KERNEL_FQ void m10500_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * loop
diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl
index a98b0e95d..b9465f730 100644
--- a/OpenCL/m13100_a0-optimized.cl
+++ b/OpenCL/m13100_a0-optimized.cl
@@ -49,7 +49,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -612,7 +612,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -710,7 +710,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl
index 1ce743e85..7a41226ef 100644
--- a/OpenCL/m13100_a0-pure.cl
+++ b/OpenCL/m13100_a0-pure.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -406,7 +406,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -468,7 +468,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl
index 406980f20..a548a7bf1 100644
--- a/OpenCL/m13100_a1-optimized.cl
+++ b/OpenCL/m13100_a1-optimized.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -609,7 +609,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -756,7 +756,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl
index 907a274b1..a8d0098b3 100644
--- a/OpenCL/m13100_a1-pure.cl
+++ b/OpenCL/m13100_a1-pure.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -402,7 +402,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -462,7 +462,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl
index b8c27d187..6079988b0 100644
--- a/OpenCL/m13100_a3-optimized.cl
+++ b/OpenCL/m13100_a3-optimized.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -679,7 +679,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -731,7 +731,7 @@ KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -787,7 +787,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -839,7 +839,7 @@ KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl
index d8751b9b3..25e60e0c5 100644
--- a/OpenCL/m13100_a3-pure.cl
+++ b/OpenCL/m13100_a3-pure.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -411,7 +411,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -484,7 +484,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m18200_a0-optimized.cl b/OpenCL/m18200_a0-optimized.cl
index e980572b3..3791d2058 100644
--- a/OpenCL/m18200_a0-optimized.cl
+++ b/OpenCL/m18200_a0-optimized.cl
@@ -49,7 +49,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -610,7 +610,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -708,7 +708,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m18200_a0-pure.cl b/OpenCL/m18200_a0-pure.cl
index 49b1f127b..ba15f1908 100644
--- a/OpenCL/m18200_a0-pure.cl
+++ b/OpenCL/m18200_a0-pure.cl
@@ -48,7 +48,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -404,7 +404,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -466,7 +466,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m18200_a1-optimized.cl b/OpenCL/m18200_a1-optimized.cl
index 1fdfd2d8f..e66367c08 100644
--- a/OpenCL/m18200_a1-optimized.cl
+++ b/OpenCL/m18200_a1-optimized.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -607,7 +607,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
@@ -754,7 +754,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   /**
    * salt
diff --git a/OpenCL/m18200_a1-pure.cl b/OpenCL/m18200_a1-pure.cl
index 08ee938fd..3817e1e8a 100644
--- a/OpenCL/m18200_a1-pure.cl
+++ b/OpenCL/m18200_a1-pure.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -400,7 +400,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -460,7 +460,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m18200_a3-optimized.cl b/OpenCL/m18200_a3-optimized.cl
index 0a196d2b9..dce8dacf6 100644
--- a/OpenCL/m18200_a3-optimized.cl
+++ b/OpenCL/m18200_a3-optimized.cl
@@ -47,7 +47,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -677,7 +677,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -729,7 +729,7 @@ KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -785,7 +785,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
@@ -837,7 +837,7 @@ KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
 }
diff --git a/OpenCL/m18200_a3-pure.cl b/OpenCL/m18200_a3-pure.cl
index b7ce0ec97..015cd4f36 100644
--- a/OpenCL/m18200_a3-pure.cl
+++ b/OpenCL/m18200_a3-pure.cl
@@ -46,7 +46,7 @@ DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
   u32 v = 0x03020100;
   u32 a = 0x04040404;
 
-  LOCAL_VK u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
+  LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
 
   #ifdef _unroll
   #pragma unroll
@@ -409,7 +409,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
@@ -482,7 +482,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t))
 
   LOCAL_VK RC4_KEY rc4_keys[64];
 
-  LOCAL_VK RC4_KEY *rc4_key = &rc4_keys[lid];
+  LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
 
   u32 checksum[4];
 
diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl
index 9d03c013c..930b85f59 100644
--- a/OpenCL/m18600-pure.cl
+++ b/OpenCL/m18600-pure.cl
@@ -621,10 +621,10 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m18
   LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
   LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
 
-  LOCAL_VK u32 *S0 = S0_all[lid];
-  LOCAL_VK u32 *S1 = S1_all[lid];
-  LOCAL_VK u32 *S2 = S2_all[lid];
-  LOCAL_VK u32 *S3 = S3_all[lid];
+  LOCAL_AS u32 *S0 = S0_all[lid];
+  LOCAL_AS u32 *S1 = S1_all[lid];
+  LOCAL_AS u32 *S2 = S2_all[lid];
+  LOCAL_AS u32 *S3 = S3_all[lid];
 
   for (u32 i = 0; i < 256; i++)
   {

From a7d04adba3d713fe453dd65c06fb51ab9dbd0ebc Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 7 May 2019 14:17:29 +0200
Subject: [PATCH 37/73] Fix opencl_devices_active and backend_devices_active

---
 src/backend.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 66e19d084..313e6c948 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -76,7 +76,14 @@ static int backend_ctx_find_alias_devices (hashcat_ctx_t *hashcat_ctx)
 
       if (device_param_dst->is_opencl == true)
       {
-        device_param_dst->skipped = true;
+        if (device_param_dst->skipped == false)
+        {
+          device_param_dst->skipped = true;
+
+          backend_ctx->opencl_devices_active--;
+
+          backend_ctx->backend_devices_active--;
+        }
       }
     }
   }
@@ -6381,6 +6388,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + opencl_devices_cnt;
   backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
 
+  // find duplicate devices (typically cuda and opencl!)
+  // using force here enables both devices, which is the worst possible outcome
+  // many users force by default, so this is not a good idea
+
+  //if (user_options->force == false)
+  //{
+  backend_ctx_find_alias_devices (hashcat_ctx);
+  //}
+
   if (backend_ctx->backend_devices_active == 0)
   {
     event_log_error (hashcat_ctx, "No devices found/left.");
@@ -6388,13 +6404,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
     return -1;
   }
 
-  // find duplicate devices (typically cuda and opencl!)
-
-  if (user_options->force == false)
-  {
-    backend_ctx_find_alias_devices (hashcat_ctx);
-  }
-
   // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
 
   if (backend_ctx->backend_devices_filter != (u64) -1)

From 54dd2ea300c8f5e2c32aed75da5badc48fc354fd Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 7 May 2019 16:07:28 +0200
Subject: [PATCH 38/73] Use same settings for vector datatypes in inc_types.h
 as seen in cuda SDK vector_types.h

---
 OpenCL/inc_types.h | 85 ++++++++++++----------------------------------
 1 file changed, 21 insertions(+), 64 deletions(-)

diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index d1c1f4498..7ee661e6e 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -56,11 +56,8 @@ typedef u64  u64x;
 
 #if VECT_SIZE == 2
 
-class u8x
+struct __device_builtin__ __builtin_align__(2) u8x
 {
-  private:
-  public:
-
   u8 s0;
   u8 s1;
 
@@ -71,11 +68,8 @@ class u8x
   inline __device__ ~u8x (void) { }
 };
 
-class u16x
+struct __device_builtin__ __builtin_align__(4) u16x
 {
-  private:
-  public:
-
   u16 s0;
   u16 s1;
 
@@ -86,11 +80,8 @@ class u16x
   inline __device__ ~u16x (void) { }
 };
 
-class u32x
+struct __device_builtin__ __builtin_align__(8) u32x
 {
-  private:
-  public:
-
   u32 s0;
   u32 s1;
 
@@ -101,11 +92,8 @@ class u32x
   inline __device__ ~u32x (void) { }
 };
 
-class u64x
+struct __device_builtin__ __builtin_align__(16) u64x
 {
-  private:
-  public:
-
   u64 s0;
   u64 s1;
 
@@ -208,11 +196,8 @@ inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1);
 
 #if VECT_SIZE == 4
 
-class u8x
+struct __device_builtin__ __builtin_align__(4) u8x
 {
-  private:
-  public:
-
   u8 s0;
   u8 s1;
   u8 s2;
@@ -225,11 +210,8 @@ class u8x
   inline __device__ ~u8x (void) { }
 };
 
-class u16x
+struct __device_builtin__ __builtin_align__(8) u16x
 {
-  private:
-  public:
-
   u16 s0;
   u16 s1;
   u16 s2;
@@ -242,11 +224,8 @@ class u16x
   inline __device__ ~u16x (void) { }
 };
 
-class u32x
+struct __device_builtin__ __builtin_align__(16) u32x
 {
-  private:
-  public:
-
   u32 s0;
   u32 s1;
   u32 s2;
@@ -259,11 +238,8 @@ class u32x
   inline __device__ ~u32x (void) { }
 };
 
-class u64x
+struct __device_builtin__ __builtin_align__(32) u64x
 {
-  private:
-  public:
-
   u64 s0;
   u64 s1;
   u64 s2;
@@ -368,11 +344,8 @@ inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~
 
 #if VECT_SIZE == 8
 
-class u8x
+struct __device_builtin__ __builtin_align__(8) u8x
 {
-  private:
-  public:
-
   u8 s0;
   u8 s1;
   u8 s2;
@@ -389,11 +362,8 @@ class u8x
   inline __device__ ~u8x (void) { }
 };
 
-class u16x
+struct __device_builtin__ __builtin_align__(16) u16x
 {
-  private:
-  public:
-
   u16 s0;
   u16 s1;
   u16 s2;
@@ -410,11 +380,8 @@ class u16x
   inline __device__ ~u16x (void) { }
 };
 
-class u32x
+struct __device_builtin__ __builtin_align__(32) u32x
 {
-  private:
-  public:
-
   u32 s0;
   u32 s1;
   u32 s2;
@@ -431,11 +398,8 @@ class u32x
   inline __device__ ~u32x (void) { }
 };
 
-class u64x
+struct __device_builtin__ __builtin_align__(64) u64x
 {
-  private:
-  public:
-
   u64 s0;
   u64 s1;
   u64 s2;
@@ -544,11 +508,8 @@ inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~
 
 #if VECT_SIZE == 16
 
-class u8x
+struct __device_builtin__ __builtin_align__(16) u8x
 {
-  private:
-  public:
-
   u8 s0;
   u8 s1;
   u8 s2;
@@ -573,11 +534,8 @@ class u8x
   inline __device__ ~u8x (void) { }
 };
 
-class u16x
+struct __device_builtin__ __builtin_align__(32) u16x
 {
-  private:
-  public:
-
   u16 s0;
   u16 s1;
   u16 s2;
@@ -602,11 +560,8 @@ class u16x
   inline __device__ ~u16x (void) { }
 };
 
-class u32x
+struct __device_builtin__ __builtin_align__(64) u32x
 {
-  private:
-  public:
-
   u32 s0;
   u32 s1;
   u32 s2;
@@ -631,11 +586,8 @@ class u32x
   inline __device__ ~u32x (void) { }
 };
 
-class u64x
+struct __device_builtin__ __builtin_align__(128) u64x
 {
-  private:
-  public:
-
   u64 s0;
   u64 s1;
   u64 s2;
@@ -750,6 +702,11 @@ inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~
 
 #endif
 
+typedef __device_builtin__ struct u8x  u8x;
+typedef __device_builtin__ struct u16x u16x;
+typedef __device_builtin__ struct u32x u32x;
+typedef __device_builtin__ struct u64x u64x;
+
 #else
 typedef VTYPE(uchar,  VECT_SIZE)  u8x;
 typedef VTYPE(ushort, VECT_SIZE) u16x;

From 363e789b89b9e84d66f15c0caa1e1dc31aa54ed0 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 7 May 2019 16:52:08 +0200
Subject: [PATCH 39/73] Assume local nvrtc.dll and cuda.dll on windows

---
 src/backend.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 313e6c948..a1d75c8a6 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -683,13 +683,11 @@ int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
   memset (nvrtc, 0, sizeof (NVRTC_PTR));
 
   #if   defined (_WIN)
-  nvrtc->lib = hc_dlopen ("c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1\\nvrtc.lib");
+  nvrtc->lib = hc_dlopen ("nvrtc.dll");
   #elif defined (__APPLE__)
   nvrtc->lib = hc_dlopen ("/System/Library/Frameworks/NVRTC.framework/NVRTC");
   #elif defined (__CYGWIN__)
   nvrtc->lib = hc_dlopen ("nvrtc.dll");
-
-  if (nvrtc->lib == NULL) nvrtc->lib = hc_dlopen ("cygnvrtc-1.dll");
   #else
   nvrtc->lib = hc_dlopen ("libnvrtc.so");
 
@@ -868,13 +866,11 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
   memset (cuda, 0, sizeof (CUDA_PTR));
 
   #if   defined (_WIN)
-  cuda->lib = hc_dlopen ("c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1\\cuda.lib");
+  cuda->lib = hc_dlopen ("cuda.dll");
   #elif defined (__APPLE__)
   cuda->lib = hc_dlopen ("/System/Library/Frameworks/CUDA.framework/CUDA");
   #elif defined (__CYGWIN__)
   cuda->lib = hc_dlopen ("cuda.dll");
-
-  if (cuda->lib == NULL) cuda->lib = hc_dlopen ("cygcuda-1.dll");
   #else
   cuda->lib = hc_dlopen ("libcuda.so");
 

From 6b7d064118b34c249b1db506f2b7c340e6cb18ea Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 8 May 2019 15:21:22 +0200
Subject: [PATCH 40/73] Replace (u32x) (...) with make_u32x (...)

---
 OpenCL/inc_cipher_des.h        |  8 ++---
 OpenCL/inc_common.cl           | 32 ++++++++---------
 OpenCL/inc_hash_streebog256.cl |  4 +--
 OpenCL/inc_hash_streebog256.h  |  8 ++---
 OpenCL/inc_hash_streebog512.cl |  4 +--
 OpenCL/inc_hash_streebog512.h  |  8 ++---
 OpenCL/inc_hash_whirlpool.h    |  8 ++---
 OpenCL/inc_simd.cl             | 30 ++++++++--------
 OpenCL/inc_simd.h              | 40 ++++++++++-----------
 OpenCL/inc_types.h             | 17 +++++++++
 OpenCL/m01500_a0-pure.cl       |  8 ++---
 OpenCL/m01500_a1-pure.cl       |  8 ++---
 OpenCL/m01750_a0-optimized.cl  | 64 +++++++++++++++++-----------------
 OpenCL/m01750_a1-optimized.cl  | 64 +++++++++++++++++-----------------
 OpenCL/m01750_a3-optimized.cl  | 64 +++++++++++++++++-----------------
 OpenCL/m01760_a0-optimized.cl  | 64 +++++++++++++++++-----------------
 OpenCL/m01760_a1-optimized.cl  | 64 +++++++++++++++++-----------------
 OpenCL/m01760_a3-optimized.cl  | 64 +++++++++++++++++-----------------
 OpenCL/m02610_a0-optimized.cl  | 10 +++---
 OpenCL/m02610_a0-pure.cl       | 10 +++---
 OpenCL/m02610_a1-optimized.cl  | 10 +++---
 OpenCL/m02610_a1-pure.cl       | 10 +++---
 OpenCL/m02610_a3-optimized.cl  | 10 +++---
 OpenCL/m02610_a3-pure.cl       | 10 +++---
 OpenCL/m02710_a0-optimized.cl  | 10 +++---
 OpenCL/m02710_a1-optimized.cl  | 10 +++---
 OpenCL/m02710_a3-optimized.cl  | 10 +++---
 OpenCL/m02810_a0-optimized.cl  | 10 +++---
 OpenCL/m02810_a0-pure.cl       | 10 +++---
 OpenCL/m02810_a1-optimized.cl  | 10 +++---
 OpenCL/m02810_a1-pure.cl       | 10 +++---
 OpenCL/m02810_a3-optimized.cl  | 10 +++---
 OpenCL/m02810_a3-pure.cl       | 10 +++---
 OpenCL/m03000_a0-pure.cl       |  8 ++---
 OpenCL/m03000_a1-pure.cl       |  8 ++---
 OpenCL/m03710_a0-optimized.cl  | 10 +++---
 OpenCL/m03710_a0-pure.cl       | 10 +++---
 OpenCL/m03710_a1-optimized.cl  | 10 +++---
 OpenCL/m03710_a1-pure.cl       | 10 +++---
 OpenCL/m03710_a3-optimized.cl  | 10 +++---
 OpenCL/m03710_a3-pure.cl       | 10 +++---
 OpenCL/m03910_a0-optimized.cl  | 10 +++---
 OpenCL/m03910_a0-pure.cl       | 10 +++---
 OpenCL/m03910_a1-optimized.cl  | 10 +++---
 OpenCL/m03910_a1-pure.cl       | 10 +++---
 OpenCL/m03910_a3-optimized.cl  | 10 +++---
 OpenCL/m03910_a3-pure.cl       | 10 +++---
 OpenCL/m04010_a0-optimized.cl  | 10 +++---
 OpenCL/m04010_a0-pure.cl       | 10 +++---
 OpenCL/m04010_a1-optimized.cl  | 10 +++---
 OpenCL/m04010_a1-pure.cl       | 10 +++---
 OpenCL/m04010_a3-optimized.cl  | 10 +++---
 OpenCL/m04010_a3-pure.cl       | 10 +++---
 OpenCL/m04110_a0-optimized.cl  | 10 +++---
 OpenCL/m04110_a0-pure.cl       | 10 +++---
 OpenCL/m04110_a1-optimized.cl  | 10 +++---
 OpenCL/m04110_a1-pure.cl       | 10 +++---
 OpenCL/m04110_a3-optimized.cl  | 10 +++---
 OpenCL/m04110_a3-pure.cl       | 10 +++---
 OpenCL/m04310_a0-optimized.cl  | 10 +++---
 OpenCL/m04310_a0-pure.cl       | 10 +++---
 OpenCL/m04310_a1-optimized.cl  | 10 +++---
 OpenCL/m04310_a1-pure.cl       | 10 +++---
 OpenCL/m04310_a3-optimized.cl  | 10 +++---
 OpenCL/m04310_a3-pure.cl       | 10 +++---
 OpenCL/m04400_a0-optimized.cl  | 10 +++---
 OpenCL/m04400_a0-pure.cl       | 10 +++---
 OpenCL/m04400_a1-optimized.cl  | 10 +++---
 OpenCL/m04400_a1-pure.cl       | 10 +++---
 OpenCL/m04400_a3-optimized.cl  | 10 +++---
 OpenCL/m04400_a3-pure.cl       | 10 +++---
 OpenCL/m04500_a0-optimized.cl  | 10 +++---
 OpenCL/m04500_a0-pure.cl       | 10 +++---
 OpenCL/m04500_a1-optimized.cl  | 10 +++---
 OpenCL/m04500_a1-pure.cl       | 10 +++---
 OpenCL/m04500_a3-optimized.cl  | 10 +++---
 OpenCL/m04500_a3-pure.cl       | 10 +++---
 OpenCL/m04520_a0-optimized.cl  | 10 +++---
 OpenCL/m04520_a0-pure.cl       | 10 +++---
 OpenCL/m04520_a1-optimized.cl  | 10 +++---
 OpenCL/m04520_a1-pure.cl       | 10 +++---
 OpenCL/m04520_a3-optimized.cl  | 10 +++---
 OpenCL/m04520_a3-pure.cl       | 10 +++---
 OpenCL/m04700_a0-optimized.cl  | 10 +++---
 OpenCL/m04700_a0-pure.cl       | 10 +++---
 OpenCL/m04700_a1-optimized.cl  | 10 +++---
 OpenCL/m04700_a1-pure.cl       | 10 +++---
 OpenCL/m04700_a3-optimized.cl  | 10 +++---
 OpenCL/m04700_a3-pure.cl       | 10 +++---
 OpenCL/m04800_a1-optimized.cl  | 10 +++---
 OpenCL/m05500_a0-optimized.cl  |  8 ++---
 OpenCL/m05500_a0-pure.cl       |  8 ++---
 OpenCL/m05500_a1-optimized.cl  |  8 ++---
 OpenCL/m05500_a1-pure.cl       |  8 ++---
 OpenCL/m05500_a3-optimized.cl  |  8 ++---
 OpenCL/m05500_a3-pure.cl       |  8 ++---
 OpenCL/m06900_a0-optimized.cl  |  8 ++---
 OpenCL/m06900_a1-optimized.cl  |  8 ++---
 OpenCL/m06900_a3-optimized.cl  |  8 ++---
 OpenCL/m08400_a0-optimized.cl  | 10 +++---
 OpenCL/m08400_a0-pure.cl       | 10 +++---
 OpenCL/m08400_a1-optimized.cl  | 10 +++---
 OpenCL/m08400_a1-pure.cl       | 10 +++---
 OpenCL/m08400_a3-optimized.cl  | 10 +++---
 OpenCL/m08400_a3-pure.cl       | 10 +++---
 OpenCL/m08500_a0-pure.cl       | 16 ++++-----
 OpenCL/m08500_a1-pure.cl       | 16 ++++-----
 OpenCL/m08500_a3-pure.cl       | 16 ++++-----
 OpenCL/m08600_a0-pure.cl       |  8 ++---
 OpenCL/m08600_a1-pure.cl       |  8 ++---
 OpenCL/m08600_a3-pure.cl       |  8 ++---
 OpenCL/m08700_a0-optimized.cl  | 18 +++++-----
 OpenCL/m08700_a1-optimized.cl  | 18 +++++-----
 OpenCL/m08700_a3-optimized.cl  | 18 +++++-----
 OpenCL/m11100_a0-optimized.cl  | 10 +++---
 OpenCL/m11100_a0-pure.cl       | 10 +++---
 OpenCL/m11100_a1-optimized.cl  | 10 +++---
 OpenCL/m11100_a1-pure.cl       | 10 +++---
 OpenCL/m11100_a3-optimized.cl  | 10 +++---
 OpenCL/m11100_a3-pure.cl       | 10 +++---
 OpenCL/m11200_a3-pure.cl       | 10 +++---
 OpenCL/m11400_a0-pure.cl       | 10 +++---
 OpenCL/m11400_a1-pure.cl       | 10 +++---
 OpenCL/m11400_a3-pure.cl       | 10 +++---
 OpenCL/m11500_a0-optimized.cl  | 10 +++---
 OpenCL/m11500_a1-optimized.cl  | 10 +++---
 OpenCL/m11500_a3-optimized.cl  | 10 +++---
 OpenCL/m12600_a0-optimized.cl  | 10 +++---
 OpenCL/m12600_a0-pure.cl       | 10 +++---
 OpenCL/m12600_a1-optimized.cl  | 10 +++---
 OpenCL/m12600_a1-pure.cl       | 10 +++---
 OpenCL/m12600_a3-optimized.cl  | 10 +++---
 OpenCL/m12600_a3-pure.cl       | 10 +++---
 OpenCL/m13900_a0-optimized.cl  | 10 +++---
 OpenCL/m13900_a0-pure.cl       | 10 +++---
 OpenCL/m13900_a1-optimized.cl  | 10 +++---
 OpenCL/m13900_a1-pure.cl       | 10 +++---
 OpenCL/m13900_a3-optimized.cl  | 10 +++---
 OpenCL/m13900_a3-pure.cl       | 10 +++---
 OpenCL/m14000_a0-pure.cl       | 16 ++++-----
 OpenCL/m14000_a1-pure.cl       | 16 ++++-----
 OpenCL/m14100_a0-pure.cl       | 16 ++++-----
 OpenCL/m14100_a1-pure.cl       | 16 ++++-----
 OpenCL/m14100_a3-pure.cl       | 16 ++++-----
 OpenCL/m14400_a0-optimized.cl  | 10 +++---
 OpenCL/m14400_a0-pure.cl       | 10 +++---
 OpenCL/m14400_a1-optimized.cl  | 10 +++---
 OpenCL/m14400_a1-pure.cl       | 10 +++---
 OpenCL/m14400_a3-optimized.cl  | 10 +++---
 OpenCL/m14400_a3-pure.cl       | 10 +++---
 OpenCL/m16000_a0-pure.cl       |  8 ++---
 OpenCL/m16000_a1-pure.cl       |  8 ++---
 OpenCL/m16000_a3-pure.cl       |  8 ++---
 OpenCL/m18500_a0-pure.cl       | 10 +++---
 OpenCL/m18500_a1-pure.cl       | 10 +++---
 OpenCL/m18500_a3-pure.cl       | 10 +++---
 OpenCL/m19500_a0-pure.cl       | 10 +++---
 OpenCL/m19500_a1-pure.cl       | 10 +++---
 OpenCL/m19500_a3-pure.cl       | 10 +++---
 159 files changed, 1012 insertions(+), 995 deletions(-)

diff --git a/OpenCL/inc_cipher_des.h b/OpenCL/inc_cipher_des.h
index d1bc516e7..c93ff422c 100644
--- a/OpenCL/inc_cipher_des.h
+++ b/OpenCL/inc_cipher_des.h
@@ -92,13 +92,13 @@
 #if   VECT_SIZE == 1
 #define DES_BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define DES_BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define DES_BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define DES_BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define DES_BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *out, const u32 *in, const u32 *Kc, const u32 *Kd, SHM_TYPE u32 (*s_SPtrans)[64]);
diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index 68fdf0b88..9e1831d2f 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -310,7 +310,7 @@ DECLSPEC u32x hc_rotl32 (const u32x a, const int n)
   #elif defined IS_CUDA
   return rotl32 (a, n);
   #else
-  return rotate (a, (u32x) (n));
+  return rotate (a, make_u32x (n));
   #endif
 }
 
@@ -321,7 +321,7 @@ DECLSPEC u32x hc_rotr32 (const u32x a, const int n)
   #elif defined IS_CUDA
   return rotr32 (a, n);
   #else
-  return rotate (a, (u32x) (32 - n));
+  return rotate (a, make_u32x (32 - n));
   #endif
 }
 
@@ -354,7 +354,7 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n)
   #elif defined IS_CUDA
   return rotl64 (a, n);
   #else
-  return rotate (a, (u64x) (n));
+  return rotate (a, make_u64x (n));
   #endif
 }
 
@@ -365,7 +365,7 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n)
   #elif defined IS_CUDA
   return rotr64 (a, n);
   #else
-  return rotate (a, (u64x) (64 - n));
+  return rotate (a, make_u64x (64 - n));
   #endif
 }
 
@@ -471,9 +471,9 @@ DECLSPEC u32x hc_swap32 (const u32x v)
   #endif
 
   #else
-  r = bitselect (rotate (v, (u32x) (24)),
-                 rotate (v, (u32x) ( 8)),
-                            (u32x) (0x00ff00ff));
+  r = bitselect (rotate (v, make_u32x (24)),
+                 rotate (v, make_u32x ( 8)),
+                            make_u32x (0x00ff00ff));
   #endif
   #endif
 
@@ -689,13 +689,13 @@ DECLSPEC u64x hc_swap64 (const u64x v)
   #endif
 
   #else
-  r = bitselect (bitselect (rotate (v, (u64x) (24)),
-                            rotate (v, (u64x) ( 8)),
-                                       (u64x) (0x000000ff000000ff)),
-                 bitselect (rotate (v, (u64x) (56)),
-                            rotate (v, (u64x) (40)),
-                                       (u64x) (0x00ff000000ff0000)),
-                                       (u64x) (0xffff0000ffff0000));
+  r = bitselect (bitselect (rotate (v, make_u64x (24)),
+                            rotate (v, make_u64x ( 8)),
+                                       make_u64x (0x000000ff000000ff)),
+                 bitselect (rotate (v, make_u64x (56)),
+                            rotate (v, make_u64x (40)),
+                                       make_u64x (0x00ff000000ff0000)),
+                                       make_u64x (0xffff0000ffff0000));
   #endif
   #endif
 
@@ -747,7 +747,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v)
 
 DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c)
 {
-  #define BIT(x)      ((u32x) (1u) << (x))
+  #define BIT(x)      (make_u32x (1u) << (x))
   #define BIT_MASK(x) (BIT (x) - 1)
   #define BFE(x,y,z)  (((x) >> (y)) & BIT_MASK (z))
 
@@ -1181,7 +1181,7 @@ DECLSPEC u32 hc_lop_0x96_S (const u32 a, const u32 b, const u32 c)
 
 DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c)
 {
-  #define BIT(x)      ((u32x) (1u) << (x))
+  #define BIT(x)      (make_u32x (1u) << (x))
   #define BIT_MASK(x) (BIT (x) - 1)
   #define BFE(x,y,z)  (((x) >> (y)) & BIT_MASK (z))
 
diff --git a/OpenCL/inc_hash_streebog256.cl b/OpenCL/inc_hash_streebog256.cl
index d97926000..92534db1d 100644
--- a/OpenCL/inc_hash_streebog256.cl
+++ b/OpenCL/inc_hash_streebog256.cl
@@ -1363,7 +1363,7 @@ DECLSPEC void streebog256_add_vector (u64x *x, const u64x *y)
     const u64x right = hc_swap64 (y[i]);
     const u64x sum   = left + right + carry;
 
-    carry = (sum < left) ? (u64x) 1 : (u64x) 0;
+    carry = (sum < left) ? make_u64x (1) : make_u64x (0);
 
     x[i] = hc_swap64 (sum);
   }
@@ -1711,7 +1711,7 @@ DECLSPEC void streebog256_final_vector (streebog256_ctx_vector_t *ctx)
   streebog256_g_vector (ctx->h, ctx->n, m, ctx->s_sbob_sl64);
 
   u64x sizebuf[8] = { 0 };
-  sizebuf[7] = hc_swap64 ((u64x) (pos << 3));
+  sizebuf[7] = hc_swap64 (make_u64x (pos << 3));
 
   streebog256_add_vector (ctx->n, sizebuf);
 
diff --git a/OpenCL/inc_hash_streebog256.h b/OpenCL/inc_hash_streebog256.h
index 1e593c3f2..71e87cf96 100644
--- a/OpenCL/inc_hash_streebog256.h
+++ b/OpenCL/inc_hash_streebog256.h
@@ -10,17 +10,17 @@
 #define BOX(S,n,i)        ((S)[(n)][(i)])
 
 #elif VECT_SIZE == 2
-#define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 
 #elif VECT_SIZE == 4
-#define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 
 #elif VECT_SIZE == 8
-#define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], \
+#define BOX(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], \
                            (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 
 #elif VECT_SIZE == 16
-#define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], \
+#define BOX(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], \
                            (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], \
                            (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], \
                            (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
diff --git a/OpenCL/inc_hash_streebog512.cl b/OpenCL/inc_hash_streebog512.cl
index fe86b1822..08580981d 100644
--- a/OpenCL/inc_hash_streebog512.cl
+++ b/OpenCL/inc_hash_streebog512.cl
@@ -1382,7 +1382,7 @@ DECLSPEC void streebog512_add_vector (u64x *x, const u64x *y)
     const u64x right = hc_swap64 (y[i]);
     const u64x sum   = left + right + carry;
 
-    carry = (sum < left) ? (u64x) 1 : (u64x) 0;
+    carry = (sum < left) ? make_u64x (1) : make_u64x (0);
 
     x[i] = hc_swap64 (sum);
   }
@@ -1730,7 +1730,7 @@ DECLSPEC void streebog512_final_vector (streebog512_ctx_vector_t *ctx)
   streebog512_g_vector (ctx->h, ctx->n, m, ctx->s_sbob_sl64);
 
   u64x sizebuf[8] = { 0 };
-  sizebuf[7] = hc_swap64 ((u64x) (pos << 3));
+  sizebuf[7] = hc_swap64 (make_u64x (pos << 3));
 
   streebog512_add_vector (ctx->n, sizebuf);
 
diff --git a/OpenCL/inc_hash_streebog512.h b/OpenCL/inc_hash_streebog512.h
index a11644ca2..4181674e8 100644
--- a/OpenCL/inc_hash_streebog512.h
+++ b/OpenCL/inc_hash_streebog512.h
@@ -10,17 +10,17 @@
 #define BOX(S,n,i)        ((S)[(n)][(i)])
 
 #elif VECT_SIZE == 2
-#define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 
 #elif VECT_SIZE == 4
-#define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 
 #elif VECT_SIZE == 8
-#define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], \
+#define BOX(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], \
                            (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 
 #elif VECT_SIZE == 16
-#define BOX(S,n,i) (u64x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], \
+#define BOX(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], \
                            (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], \
                            (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], \
                            (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
diff --git a/OpenCL/inc_hash_whirlpool.h b/OpenCL/inc_hash_whirlpool.h
index 5f25fbdc4..882bdd75b 100644
--- a/OpenCL/inc_hash_whirlpool.h
+++ b/OpenCL/inc_hash_whirlpool.h
@@ -11,13 +11,13 @@
 #if   VECT_SIZE == 1
 #define BOX(S,n,i) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(S,n,i) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(S,n,i) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(S,n,i) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(S,n,i) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #define BOX_S(S,n,i) (S)[(n)][(i)]
diff --git a/OpenCL/inc_simd.cl b/OpenCL/inc_simd.cl
index 50bafeb68..329270140 100644
--- a/OpenCL/inc_simd.cl
+++ b/OpenCL/inc_simd.cl
@@ -14,15 +14,15 @@
 DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos)
 {
   #if   VECT_SIZE == 1
-  const u32x ix = (u32x) (bfs_buf[il_pos + 0].i);
+  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i);
   #elif VECT_SIZE == 2
-  const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
+  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
   #elif VECT_SIZE == 4
-  const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
+  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
   #elif VECT_SIZE == 8
-  const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
+  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
   #elif VECT_SIZE == 16
-  const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i);
+  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i);
   #endif
 
   return ix;
@@ -33,15 +33,15 @@ DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos)
 DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos)
 {
   #if   VECT_SIZE == 1
-  const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len);
+  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len);
   #elif VECT_SIZE == 2
-  const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len);
+  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len);
   #elif VECT_SIZE == 4
-  const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len);
+  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len);
   #elif VECT_SIZE == 8
-  const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len);
+  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len);
   #elif VECT_SIZE == 16
-  const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len);
+  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len);
   #endif
 
   return pw_lenx;
@@ -50,15 +50,15 @@ DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il
 DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos, const int idx)
 {
   #if   VECT_SIZE == 1
-  const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx]);
+  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx]);
   #elif VECT_SIZE == 2
-  const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]);
+  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]);
   #elif VECT_SIZE == 4
-  const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]);
+  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]);
   #elif VECT_SIZE == 8
-  const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]);
+  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]);
   #elif VECT_SIZE == 16
-  const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]);
+  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]);
   #endif
 
   return ix;
diff --git a/OpenCL/inc_simd.h b/OpenCL/inc_simd.h
index f8e87e4d2..4d0d359cf 100644
--- a/OpenCL/inc_simd.h
+++ b/OpenCL/inc_simd.h
@@ -1050,51 +1050,51 @@
 #define MATCHES_NONE_VS(a,b) !(MATCHES_ONE_VS ((a), (b)))
 
 #if   VECT_SIZE == 1
-#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) *  1) + 0].var[(idx)])
+#define packv(arr,var,gid,idx) make_u32x ((arr)[((gid) *  1) + 0].var[(idx)])
 #elif VECT_SIZE == 2
-#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) *  2) + 0].var[(idx)], (arr)[((gid) *  2) + 1].var[(idx)])
+#define packv(arr,var,gid,idx) make_u32x ((arr)[((gid) *  2) + 0].var[(idx)], (arr)[((gid) *  2) + 1].var[(idx)])
 #elif VECT_SIZE == 4
-#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) *  4) + 0].var[(idx)], (arr)[((gid) *  4) + 1].var[(idx)], (arr)[((gid) *  4) + 2].var[(idx)], (arr)[((gid) *  4) + 3].var[(idx)])
+#define packv(arr,var,gid,idx) make_u32x ((arr)[((gid) *  4) + 0].var[(idx)], (arr)[((gid) *  4) + 1].var[(idx)], (arr)[((gid) *  4) + 2].var[(idx)], (arr)[((gid) *  4) + 3].var[(idx)])
 #elif VECT_SIZE == 8
-#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) *  8) + 0].var[(idx)], (arr)[((gid) *  8) + 1].var[(idx)], (arr)[((gid) *  8) + 2].var[(idx)], (arr)[((gid) *  8) + 3].var[(idx)], (arr)[((gid) *  8) + 4].var[(idx)], (arr)[((gid) *  8) + 5].var[(idx)], (arr)[((gid) *  8) + 6].var[(idx)], (arr)[((gid) *  8) + 7].var[(idx)])
+#define packv(arr,var,gid,idx) make_u32x ((arr)[((gid) *  8) + 0].var[(idx)], (arr)[((gid) *  8) + 1].var[(idx)], (arr)[((gid) *  8) + 2].var[(idx)], (arr)[((gid) *  8) + 3].var[(idx)], (arr)[((gid) *  8) + 4].var[(idx)], (arr)[((gid) *  8) + 5].var[(idx)], (arr)[((gid) *  8) + 6].var[(idx)], (arr)[((gid) *  8) + 7].var[(idx)])
 #elif VECT_SIZE == 16
-#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
+#define packv(arr,var,gid,idx) make_u32x ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
 #endif
 
 #if   VECT_SIZE == 1
-#define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) *  1) + 0].var[(idx)])
+#define pack64v(arr,var,gid,idx) make_u64x ((arr)[((gid) *  1) + 0].var[(idx)])
 #elif VECT_SIZE == 2
-#define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) *  2) + 0].var[(idx)], (arr)[((gid) *  2) + 1].var[(idx)])
+#define pack64v(arr,var,gid,idx) make_u64x ((arr)[((gid) *  2) + 0].var[(idx)], (arr)[((gid) *  2) + 1].var[(idx)])
 #elif VECT_SIZE == 4
-#define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) *  4) + 0].var[(idx)], (arr)[((gid) *  4) + 1].var[(idx)], (arr)[((gid) *  4) + 2].var[(idx)], (arr)[((gid) *  4) + 3].var[(idx)])
+#define pack64v(arr,var,gid,idx) make_u64x ((arr)[((gid) *  4) + 0].var[(idx)], (arr)[((gid) *  4) + 1].var[(idx)], (arr)[((gid) *  4) + 2].var[(idx)], (arr)[((gid) *  4) + 3].var[(idx)])
 #elif VECT_SIZE == 8
-#define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) *  8) + 0].var[(idx)], (arr)[((gid) *  8) + 1].var[(idx)], (arr)[((gid) *  8) + 2].var[(idx)], (arr)[((gid) *  8) + 3].var[(idx)], (arr)[((gid) *  8) + 4].var[(idx)], (arr)[((gid) *  8) + 5].var[(idx)], (arr)[((gid) *  8) + 6].var[(idx)], (arr)[((gid) *  8) + 7].var[(idx)])
+#define pack64v(arr,var,gid,idx) make_u64x ((arr)[((gid) *  8) + 0].var[(idx)], (arr)[((gid) *  8) + 1].var[(idx)], (arr)[((gid) *  8) + 2].var[(idx)], (arr)[((gid) *  8) + 3].var[(idx)], (arr)[((gid) *  8) + 4].var[(idx)], (arr)[((gid) *  8) + 5].var[(idx)], (arr)[((gid) *  8) + 6].var[(idx)], (arr)[((gid) *  8) + 7].var[(idx)])
 #elif VECT_SIZE == 16
-#define pack64v(arr,var,gid,idx) (u64x) ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
+#define pack64v(arr,var,gid,idx) make_u64x ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)])
 #endif
 
 #if   VECT_SIZE == 1
-#define packvf(arr,var,gid) (u32x) ((arr)[((gid) *  1) + 0].var)
+#define packvf(arr,var,gid) make_u32x ((arr)[((gid) *  1) + 0].var)
 #elif VECT_SIZE == 2
-#define packvf(arr,var,gid) (u32x) ((arr)[((gid) *  2) + 0].var, (arr)[((gid) *  2) + 1].var)
+#define packvf(arr,var,gid) make_u32x ((arr)[((gid) *  2) + 0].var, (arr)[((gid) *  2) + 1].var)
 #elif VECT_SIZE == 4
-#define packvf(arr,var,gid) (u32x) ((arr)[((gid) *  4) + 0].var, (arr)[((gid) *  4) + 1].var, (arr)[((gid) *  4) + 2].var, (arr)[((gid) *  4) + 3].var)
+#define packvf(arr,var,gid) make_u32x ((arr)[((gid) *  4) + 0].var, (arr)[((gid) *  4) + 1].var, (arr)[((gid) *  4) + 2].var, (arr)[((gid) *  4) + 3].var)
 #elif VECT_SIZE == 8
-#define packvf(arr,var,gid) (u32x) ((arr)[((gid) *  8) + 0].var, (arr)[((gid) *  8) + 1].var, (arr)[((gid) *  8) + 2].var, (arr)[((gid) *  8) + 3].var, (arr)[((gid) *  8) + 4].var, (arr)[((gid) *  8) + 5].var, (arr)[((gid) *  8) + 6].var, (arr)[((gid) *  8) + 7].var)
+#define packvf(arr,var,gid) make_u32x ((arr)[((gid) *  8) + 0].var, (arr)[((gid) *  8) + 1].var, (arr)[((gid) *  8) + 2].var, (arr)[((gid) *  8) + 3].var, (arr)[((gid) *  8) + 4].var, (arr)[((gid) *  8) + 5].var, (arr)[((gid) *  8) + 6].var, (arr)[((gid) *  8) + 7].var)
 #elif VECT_SIZE == 16
-#define packvf(arr,var,gid) (u32x) ((arr)[((gid) * 16) + 0].var, (arr)[((gid) * 16) + 1].var, (arr)[((gid) * 16) + 2].var, (arr)[((gid) * 16) + 3].var, (arr)[((gid) * 16) + 4].var, (arr)[((gid) * 16) + 5].var, (arr)[((gid) * 16) + 6].var, (arr)[((gid) * 16) + 7].var, (arr)[((gid) * 16) + 8].var, (arr)[((gid) * 16) + 9].var, (arr)[((gid) * 16) + 10].var, (arr)[((gid) * 16) + 11].var, (arr)[((gid) * 16) + 12].var, (arr)[((gid) * 16) + 13].var, (arr)[((gid) * 16) + 14].var, (arr)[((gid) * 16) + 15].var)
+#define packvf(arr,var,gid) make_u32x ((arr)[((gid) * 16) + 0].var, (arr)[((gid) * 16) + 1].var, (arr)[((gid) * 16) + 2].var, (arr)[((gid) * 16) + 3].var, (arr)[((gid) * 16) + 4].var, (arr)[((gid) * 16) + 5].var, (arr)[((gid) * 16) + 6].var, (arr)[((gid) * 16) + 7].var, (arr)[((gid) * 16) + 8].var, (arr)[((gid) * 16) + 9].var, (arr)[((gid) * 16) + 10].var, (arr)[((gid) * 16) + 11].var, (arr)[((gid) * 16) + 12].var, (arr)[((gid) * 16) + 13].var, (arr)[((gid) * 16) + 14].var, (arr)[((gid) * 16) + 15].var)
 #endif
 
 #if   VECT_SIZE == 1
-#define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) *  1) + 0].var)
+#define pack64vf(arr,var,gid) make_u64x ((arr)[((gid) *  1) + 0].var)
 #elif VECT_SIZE == 2
-#define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) *  2) + 0].var, (arr)[((gid) *  2) + 1].var)
+#define pack64vf(arr,var,gid) make_u64x ((arr)[((gid) *  2) + 0].var, (arr)[((gid) *  2) + 1].var)
 #elif VECT_SIZE == 4
-#define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) *  4) + 0].var, (arr)[((gid) *  4) + 1].var, (arr)[((gid) *  4) + 2].var, (arr)[((gid) *  4) + 3].var)
+#define pack64vf(arr,var,gid) make_u64x ((arr)[((gid) *  4) + 0].var, (arr)[((gid) *  4) + 1].var, (arr)[((gid) *  4) + 2].var, (arr)[((gid) *  4) + 3].var)
 #elif VECT_SIZE == 8
-#define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) *  8) + 0].var, (arr)[((gid) *  8) + 1].var, (arr)[((gid) *  8) + 2].var, (arr)[((gid) *  8) + 3].var, (arr)[((gid) *  8) + 4].var, (arr)[((gid) *  8) + 5].var, (arr)[((gid) *  8) + 6].var, (arr)[((gid) *  8) + 7].var)
+#define pack64vf(arr,var,gid) make_u64x ((arr)[((gid) *  8) + 0].var, (arr)[((gid) *  8) + 1].var, (arr)[((gid) *  8) + 2].var, (arr)[((gid) *  8) + 3].var, (arr)[((gid) *  8) + 4].var, (arr)[((gid) *  8) + 5].var, (arr)[((gid) *  8) + 6].var, (arr)[((gid) *  8) + 7].var)
 #elif VECT_SIZE == 16
-#define pack64vf(arr,var,gid) (u64x) ((arr)[((gid) * 16) + 0].var, (arr)[((gid) * 16) + 1].var, (arr)[((gid) * 16) + 2].var, (arr)[((gid) * 16) + 3].var, (arr)[((gid) * 16) + 4].var, (arr)[((gid) * 16) + 5].var, (arr)[((gid) * 16) + 6].var, (arr)[((gid) * 16) + 7].var, (arr)[((gid) * 16) + 8].var, (arr)[((gid) * 16) + 9].var, (arr)[((gid) * 16) + 10].var, (arr)[((gid) * 16) + 11].var, (arr)[((gid) * 16) + 12].var, (arr)[((gid) * 16) + 13].var, (arr)[((gid) * 16) + 14].var, (arr)[((gid) * 16) + 15].var)
+#define pack64vf(arr,var,gid) make_u64x ((arr)[((gid) * 16) + 0].var, (arr)[((gid) * 16) + 1].var, (arr)[((gid) * 16) + 2].var, (arr)[((gid) * 16) + 3].var, (arr)[((gid) * 16) + 4].var, (arr)[((gid) * 16) + 5].var, (arr)[((gid) * 16) + 6].var, (arr)[((gid) * 16) + 7].var, (arr)[((gid) * 16) + 8].var, (arr)[((gid) * 16) + 9].var, (arr)[((gid) * 16) + 10].var, (arr)[((gid) * 16) + 11].var, (arr)[((gid) * 16) + 12].var, (arr)[((gid) * 16) + 13].var, (arr)[((gid) * 16) + 14].var, (arr)[((gid) * 16) + 15].var)
 #endif
 
 #if   VECT_SIZE == 1
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 7ee661e6e..44c913699 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -51,6 +51,12 @@ typedef u8   u8x;
 typedef u16  u16x;
 typedef u32  u32x;
 typedef u64  u64x;
+
+#define make_u8x  (u8)
+#define make_u16x (u16)
+#define make_u32x (u32)
+#define make_u64x (u64)
+
 #else
 #ifdef IS_CUDA
 
@@ -707,11 +713,22 @@ typedef __device_builtin__ struct u16x u16x;
 typedef __device_builtin__ struct u32x u32x;
 typedef __device_builtin__ struct u64x u64x;
 
+#define make_u8x  u8x
+#define make_u16x u16x
+#define make_u32x u32x
+#define make_u64x u64x
+
 #else
 typedef VTYPE(uchar,  VECT_SIZE)  u8x;
 typedef VTYPE(ushort, VECT_SIZE) u16x;
 typedef VTYPE(uint,   VECT_SIZE) u32x;
 typedef VTYPE(ulong,  VECT_SIZE) u64x;
+
+#define make_u8x  (u8x)
+#define make_u16x (u16x)
+#define make_u32x (u32x)
+#define make_u64x (u64x)
+
 #endif
 #endif
 
diff --git a/OpenCL/m01500_a0-pure.cl b/OpenCL/m01500_a0-pure.cl
index 9614ba7d9..2b7b234a7 100644
--- a/OpenCL/m01500_a0-pure.cl
+++ b/OpenCL/m01500_a0-pure.cl
@@ -334,13 +334,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_keysetup (u32 c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64])
diff --git a/OpenCL/m01500_a1-pure.cl b/OpenCL/m01500_a1-pure.cl
index 8f7565fa3..e7ee552b8 100644
--- a/OpenCL/m01500_a1-pure.cl
+++ b/OpenCL/m01500_a1-pure.cl
@@ -332,13 +332,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_keysetup (u32 c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64])
diff --git a/OpenCL/m01750_a0-optimized.cl b/OpenCL/m01750_a0-optimized.cl
index 77115c6cb..de916ff90 100644
--- a/OpenCL/m01750_a0-optimized.cl
+++ b/OpenCL/m01750_a0-optimized.cl
@@ -70,22 +70,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
   u64x w2_t[4];
   u64x w3_t[4];
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x3636363636363636;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x3636363636363636;
-  w2_t[0] =                             (u64x) 0x3636363636363636;
-  w2_t[1] =                             (u64x) 0x3636363636363636;
-  w2_t[2] =                             (u64x) 0x3636363636363636;
-  w2_t[3] =                             (u64x) 0x3636363636363636;
-  w3_t[0] =                             (u64x) 0x3636363636363636;
-  w3_t[1] =                             (u64x) 0x3636363636363636;
-  w3_t[2] =                             (u64x) 0x3636363636363636;
-  w3_t[3] =                             (u64x) 0x3636363636363636;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x3636363636363636);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x3636363636363636);
+  w2_t[0] =                             make_u64x (0x3636363636363636);
+  w2_t[1] =                             make_u64x (0x3636363636363636);
+  w2_t[2] =                             make_u64x (0x3636363636363636);
+  w2_t[3] =                             make_u64x (0x3636363636363636);
+  w3_t[0] =                             make_u64x (0x3636363636363636);
+  w3_t[1] =                             make_u64x (0x3636363636363636);
+  w3_t[2] =                             make_u64x (0x3636363636363636);
+  w3_t[3] =                             make_u64x (0x3636363636363636);
 
   ipad[0] = SHA512M_A;
   ipad[1] = SHA512M_B;
@@ -98,22 +98,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
 
   sha512_transform_transport_vector (w0_t, w1_t, w2_t, w3_t, ipad);
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
 
   opad[0] = SHA512M_A;
   opad[1] = SHA512M_B;
diff --git a/OpenCL/m01750_a1-optimized.cl b/OpenCL/m01750_a1-optimized.cl
index c7bbc3c22..6d3287176 100644
--- a/OpenCL/m01750_a1-optimized.cl
+++ b/OpenCL/m01750_a1-optimized.cl
@@ -68,22 +68,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
   u64x w2_t[4];
   u64x w3_t[4];
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x3636363636363636;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x3636363636363636;
-  w2_t[0] =                             (u64x) 0x3636363636363636;
-  w2_t[1] =                             (u64x) 0x3636363636363636;
-  w2_t[2] =                             (u64x) 0x3636363636363636;
-  w2_t[3] =                             (u64x) 0x3636363636363636;
-  w3_t[0] =                             (u64x) 0x3636363636363636;
-  w3_t[1] =                             (u64x) 0x3636363636363636;
-  w3_t[2] =                             (u64x) 0x3636363636363636;
-  w3_t[3] =                             (u64x) 0x3636363636363636;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x3636363636363636);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x3636363636363636);
+  w2_t[0] =                             make_u64x (0x3636363636363636);
+  w2_t[1] =                             make_u64x (0x3636363636363636);
+  w2_t[2] =                             make_u64x (0x3636363636363636);
+  w2_t[3] =                             make_u64x (0x3636363636363636);
+  w3_t[0] =                             make_u64x (0x3636363636363636);
+  w3_t[1] =                             make_u64x (0x3636363636363636);
+  w3_t[2] =                             make_u64x (0x3636363636363636);
+  w3_t[3] =                             make_u64x (0x3636363636363636);
 
   ipad[0] = SHA512M_A;
   ipad[1] = SHA512M_B;
@@ -96,22 +96,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
 
   sha512_transform_transport_vector (w0_t, w1_t, w2_t, w3_t, ipad);
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
 
   opad[0] = SHA512M_A;
   opad[1] = SHA512M_B;
diff --git a/OpenCL/m01750_a3-optimized.cl b/OpenCL/m01750_a3-optimized.cl
index 29e30c3fb..99301acde 100644
--- a/OpenCL/m01750_a3-optimized.cl
+++ b/OpenCL/m01750_a3-optimized.cl
@@ -68,22 +68,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
   u64x w2_t[4];
   u64x w3_t[4];
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x3636363636363636;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x3636363636363636;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x3636363636363636;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x3636363636363636;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x3636363636363636;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x3636363636363636;
-  w2_t[0] =                             0x3636363636363636;
-  w2_t[1] =                             0x3636363636363636;
-  w2_t[2] =                             0x3636363636363636;
-  w2_t[3] =                             0x3636363636363636;
-  w3_t[0] =                             0x3636363636363636;
-  w3_t[1] =                             0x3636363636363636;
-  w3_t[2] =                             0x3636363636363636;
-  w3_t[3] =                             0x3636363636363636;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x3636363636363636);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x3636363636363636);
+  w2_t[0] =                             make_u64x (0x3636363636363636);
+  w2_t[1] =                             make_u64x (0x3636363636363636);
+  w2_t[2] =                             make_u64x (0x3636363636363636);
+  w2_t[3] =                             make_u64x (0x3636363636363636);
+  w3_t[0] =                             make_u64x (0x3636363636363636);
+  w3_t[1] =                             make_u64x (0x3636363636363636);
+  w3_t[2] =                             make_u64x (0x3636363636363636);
+  w3_t[3] =                             make_u64x (0x3636363636363636);
 
   ipad[0] = SHA512M_A;
   ipad[1] = SHA512M_B;
@@ -96,22 +96,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
 
   sha512_transform_transport_vector (w0_t, w1_t, w2_t, w3_t, ipad);
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x5c5c5c5c5c5c5c5c;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x5c5c5c5c5c5c5c5c;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x5c5c5c5c5c5c5c5c;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x5c5c5c5c5c5c5c5c;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x5c5c5c5c5c5c5c5c;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x5c5c5c5c5c5c5c5c;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x5c5c5c5c5c5c5c5c;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x5c5c5c5c5c5c5c5c;
-  w2_t[0] =                             0x5c5c5c5c5c5c5c5c;
-  w2_t[1] =                             0x5c5c5c5c5c5c5c5c;
-  w2_t[2] =                             0x5c5c5c5c5c5c5c5c;
-  w2_t[3] =                             0x5c5c5c5c5c5c5c5c;
-  w3_t[0] =                             0x5c5c5c5c5c5c5c5c;
-  w3_t[1] =                             0x5c5c5c5c5c5c5c5c;
-  w3_t[2] =                             0x5c5c5c5c5c5c5c5c;
-  w3_t[3] =                             0x5c5c5c5c5c5c5c5c;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
 
   opad[0] = SHA512M_A;
   opad[1] = SHA512M_B;
diff --git a/OpenCL/m01760_a0-optimized.cl b/OpenCL/m01760_a0-optimized.cl
index a88156593..ec036ab8c 100644
--- a/OpenCL/m01760_a0-optimized.cl
+++ b/OpenCL/m01760_a0-optimized.cl
@@ -70,22 +70,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
   u64x w2_t[4];
   u64x w3_t[4];
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x3636363636363636;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x3636363636363636;
-  w2_t[0] =                             (u64x) 0x3636363636363636;
-  w2_t[1] =                             (u64x) 0x3636363636363636;
-  w2_t[2] =                             (u64x) 0x3636363636363636;
-  w2_t[3] =                             (u64x) 0x3636363636363636;
-  w3_t[0] =                             (u64x) 0x3636363636363636;
-  w3_t[1] =                             (u64x) 0x3636363636363636;
-  w3_t[2] =                             (u64x) 0x3636363636363636;
-  w3_t[3] =                             (u64x) 0x3636363636363636;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x3636363636363636);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x3636363636363636);
+  w2_t[0] =                             make_u64x (0x3636363636363636);
+  w2_t[1] =                             make_u64x (0x3636363636363636);
+  w2_t[2] =                             make_u64x (0x3636363636363636);
+  w2_t[3] =                             make_u64x (0x3636363636363636);
+  w3_t[0] =                             make_u64x (0x3636363636363636);
+  w3_t[1] =                             make_u64x (0x3636363636363636);
+  w3_t[2] =                             make_u64x (0x3636363636363636);
+  w3_t[3] =                             make_u64x (0x3636363636363636);
 
   ipad[0] = SHA512M_A;
   ipad[1] = SHA512M_B;
@@ -98,22 +98,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
 
   sha512_transform_transport_vector (w0_t, w1_t, w2_t, w3_t, ipad);
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
 
   opad[0] = SHA512M_A;
   opad[1] = SHA512M_B;
diff --git a/OpenCL/m01760_a1-optimized.cl b/OpenCL/m01760_a1-optimized.cl
index d06f82987..016483936 100644
--- a/OpenCL/m01760_a1-optimized.cl
+++ b/OpenCL/m01760_a1-optimized.cl
@@ -68,22 +68,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
   u64x w2_t[4];
   u64x w3_t[4];
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x3636363636363636;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x3636363636363636;
-  w2_t[0] =                             (u64x) 0x3636363636363636;
-  w2_t[1] =                             (u64x) 0x3636363636363636;
-  w2_t[2] =                             (u64x) 0x3636363636363636;
-  w2_t[3] =                             (u64x) 0x3636363636363636;
-  w3_t[0] =                             (u64x) 0x3636363636363636;
-  w3_t[1] =                             (u64x) 0x3636363636363636;
-  w3_t[2] =                             (u64x) 0x3636363636363636;
-  w3_t[3] =                             (u64x) 0x3636363636363636;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x3636363636363636);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x3636363636363636);
+  w2_t[0] =                             make_u64x (0x3636363636363636);
+  w2_t[1] =                             make_u64x (0x3636363636363636);
+  w2_t[2] =                             make_u64x (0x3636363636363636);
+  w2_t[3] =                             make_u64x (0x3636363636363636);
+  w3_t[0] =                             make_u64x (0x3636363636363636);
+  w3_t[1] =                             make_u64x (0x3636363636363636);
+  w3_t[2] =                             make_u64x (0x3636363636363636);
+  w3_t[3] =                             make_u64x (0x3636363636363636);
 
   ipad[0] = SHA512M_A;
   ipad[1] = SHA512M_B;
@@ -96,22 +96,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
 
   sha512_transform_transport_vector (w0_t, w1_t, w2_t, w3_t, ipad);
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
 
   opad[0] = SHA512M_A;
   opad[1] = SHA512M_B;
diff --git a/OpenCL/m01760_a3-optimized.cl b/OpenCL/m01760_a3-optimized.cl
index 4ce0ea423..3b5a5ca13 100644
--- a/OpenCL/m01760_a3-optimized.cl
+++ b/OpenCL/m01760_a3-optimized.cl
@@ -68,22 +68,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
   u64x w2_t[4];
   u64x w3_t[4];
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x3636363636363636;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x3636363636363636;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x3636363636363636;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x3636363636363636;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x3636363636363636;
-  w2_t[0] =                             (u64x) 0x3636363636363636;
-  w2_t[1] =                             (u64x) 0x3636363636363636;
-  w2_t[2] =                             (u64x) 0x3636363636363636;
-  w2_t[3] =                             (u64x) 0x3636363636363636;
-  w3_t[0] =                             (u64x) 0x3636363636363636;
-  w3_t[1] =                             (u64x) 0x3636363636363636;
-  w3_t[2] =                             (u64x) 0x3636363636363636;
-  w3_t[3] =                             (u64x) 0x3636363636363636;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x3636363636363636);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x3636363636363636);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x3636363636363636);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x3636363636363636);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x3636363636363636);
+  w2_t[0] =                             make_u64x (0x3636363636363636);
+  w2_t[1] =                             make_u64x (0x3636363636363636);
+  w2_t[2] =                             make_u64x (0x3636363636363636);
+  w2_t[3] =                             make_u64x (0x3636363636363636);
+  w3_t[0] =                             make_u64x (0x3636363636363636);
+  w3_t[1] =                             make_u64x (0x3636363636363636);
+  w3_t[2] =                             make_u64x (0x3636363636363636);
+  w3_t[3] =                             make_u64x (0x3636363636363636);
 
   ipad[0] = SHA512M_A;
   ipad[1] = SHA512M_B;
@@ -96,22 +96,22 @@ DECLSPEC void hmac_sha512_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u64x *ipa
 
   sha512_transform_transport_vector (w0_t, w1_t, w2_t, w3_t, ipad);
 
-  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w2_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[0] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[1] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[2] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
-  w3_t[3] =                             (u64x) 0x5c5c5c5c5c5c5c5c;
+  w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w2_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[0] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[1] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[2] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
+  w3_t[3] =                             make_u64x (0x5c5c5c5c5c5c5c5c);
 
   opad[0] = SHA512M_A;
   opad[1] = SHA512M_B;
diff --git a/OpenCL/m02610_a0-optimized.cl b/OpenCL/m02610_a0-optimized.cl
index 82e9dcfb4..f0b9b8abb 100644
--- a/OpenCL/m02610_a0-optimized.cl
+++ b/OpenCL/m02610_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02610_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m02610_a0-pure.cl b/OpenCL/m02610_a0-pure.cl
index 6f898154a..e8c750166 100644
--- a/OpenCL/m02610_a0-pure.cl
+++ b/OpenCL/m02610_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02610_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m02610_a1-optimized.cl b/OpenCL/m02610_a1-optimized.cl
index c697faa2e..a32220ae6 100644
--- a/OpenCL/m02610_a1-optimized.cl
+++ b/OpenCL/m02610_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m02610_a1-pure.cl b/OpenCL/m02610_a1-pure.cl
index 0db5b5a50..8c03417cf 100644
--- a/OpenCL/m02610_a1-pure.cl
+++ b/OpenCL/m02610_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02610_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m02610_a3-optimized.cl b/OpenCL/m02610_a3-optimized.cl
index eea27f0c4..4a5099318 100644
--- a/OpenCL/m02610_a3-optimized.cl
+++ b/OpenCL/m02610_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m02610m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m02610_a3-pure.cl b/OpenCL/m02610_a3-pure.cl
index 7aadbe278..29b0d157a 100644
--- a/OpenCL/m02610_a3-pure.cl
+++ b/OpenCL/m02610_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02610_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m02710_a0-optimized.cl b/OpenCL/m02710_a0-optimized.cl
index 23f08eae5..54e63878e 100644
--- a/OpenCL/m02710_a0-optimized.cl
+++ b/OpenCL/m02710_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02710_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m02710_a1-optimized.cl b/OpenCL/m02710_a1-optimized.cl
index 2c1aa24dc..413d5c8e9 100644
--- a/OpenCL/m02710_a1-optimized.cl
+++ b/OpenCL/m02710_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m02710_a3-optimized.cl b/OpenCL/m02710_a3-optimized.cl
index 4de5c2a47..8c3528c5b 100644
--- a/OpenCL/m02710_a3-optimized.cl
+++ b/OpenCL/m02710_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m02710m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m02810_a0-optimized.cl b/OpenCL/m02810_a0-optimized.cl
index 7e35b7ac2..577d620d6 100644
--- a/OpenCL/m02810_a0-optimized.cl
+++ b/OpenCL/m02810_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02810_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m02810_a0-pure.cl b/OpenCL/m02810_a0-pure.cl
index 16a4bc77f..bec93ce7f 100644
--- a/OpenCL/m02810_a0-pure.cl
+++ b/OpenCL/m02810_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02810_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m02810_a1-optimized.cl b/OpenCL/m02810_a1-optimized.cl
index e62cd80e4..ac48acf95 100644
--- a/OpenCL/m02810_a1-optimized.cl
+++ b/OpenCL/m02810_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02810_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m02810_a1-pure.cl b/OpenCL/m02810_a1-pure.cl
index 330909f26..b6dbd3f1e 100644
--- a/OpenCL/m02810_a1-pure.cl
+++ b/OpenCL/m02810_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02810_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m02810_a3-optimized.cl b/OpenCL/m02810_a3-optimized.cl
index 25d0c44d0..94df6ea7f 100644
--- a/OpenCL/m02810_a3-optimized.cl
+++ b/OpenCL/m02810_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m02810m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m02810_a3-pure.cl b/OpenCL/m02810_a3-pure.cl
index 42fd50213..f6e4185fd 100644
--- a/OpenCL/m02810_a3-pure.cl
+++ b/OpenCL/m02810_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m02810_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m03000_a0-pure.cl b/OpenCL/m03000_a0-pure.cl
index 945043f25..f1652e046 100644
--- a/OpenCL/m03000_a0-pure.cl
+++ b/OpenCL/m03000_a0-pure.cl
@@ -337,13 +337,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m03000_a1-pure.cl b/OpenCL/m03000_a1-pure.cl
index 2f4d97572..9b7820579 100644
--- a/OpenCL/m03000_a1-pure.cl
+++ b/OpenCL/m03000_a1-pure.cl
@@ -335,13 +335,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m03710_a0-optimized.cl b/OpenCL/m03710_a0-optimized.cl
index 2e1a9316f..74ae66a6e 100644
--- a/OpenCL/m03710_a0-optimized.cl
+++ b/OpenCL/m03710_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03710_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m03710_a0-pure.cl b/OpenCL/m03710_a0-pure.cl
index 033e57336..a8712e610 100644
--- a/OpenCL/m03710_a0-pure.cl
+++ b/OpenCL/m03710_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03710_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m03710_a1-optimized.cl b/OpenCL/m03710_a1-optimized.cl
index 9d3f6c23e..633de2cc2 100644
--- a/OpenCL/m03710_a1-optimized.cl
+++ b/OpenCL/m03710_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03710_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m03710_a1-pure.cl b/OpenCL/m03710_a1-pure.cl
index eb2ed10e8..c77e07919 100644
--- a/OpenCL/m03710_a1-pure.cl
+++ b/OpenCL/m03710_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03710_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m03710_a3-optimized.cl b/OpenCL/m03710_a3-optimized.cl
index f1dbd2d80..27198dba5 100644
--- a/OpenCL/m03710_a3-optimized.cl
+++ b/OpenCL/m03710_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m03710m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m03710_a3-pure.cl b/OpenCL/m03710_a3-pure.cl
index 4311184ce..153265207 100644
--- a/OpenCL/m03710_a3-pure.cl
+++ b/OpenCL/m03710_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03710_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m03910_a0-optimized.cl b/OpenCL/m03910_a0-optimized.cl
index e3b540658..0b052fdc3 100644
--- a/OpenCL/m03910_a0-optimized.cl
+++ b/OpenCL/m03910_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03910_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m03910_a0-pure.cl b/OpenCL/m03910_a0-pure.cl
index ea196e051..ff8474f46 100644
--- a/OpenCL/m03910_a0-pure.cl
+++ b/OpenCL/m03910_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03910_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m03910_a1-optimized.cl b/OpenCL/m03910_a1-optimized.cl
index c28bace04..57abab584 100644
--- a/OpenCL/m03910_a1-optimized.cl
+++ b/OpenCL/m03910_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03910_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m03910_a1-pure.cl b/OpenCL/m03910_a1-pure.cl
index 7608787ed..245041c93 100644
--- a/OpenCL/m03910_a1-pure.cl
+++ b/OpenCL/m03910_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03910_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m03910_a3-optimized.cl b/OpenCL/m03910_a3-optimized.cl
index d8a42b48c..7c2d11816 100644
--- a/OpenCL/m03910_a3-optimized.cl
+++ b/OpenCL/m03910_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m03910m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m03910_a3-pure.cl b/OpenCL/m03910_a3-pure.cl
index 873c5485e..31b13ca35 100644
--- a/OpenCL/m03910_a3-pure.cl
+++ b/OpenCL/m03910_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m03910_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m04010_a0-optimized.cl b/OpenCL/m04010_a0-optimized.cl
index e7b4c6fd0..96909c9b6 100644
--- a/OpenCL/m04010_a0-optimized.cl
+++ b/OpenCL/m04010_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04010_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04010_a0-pure.cl b/OpenCL/m04010_a0-pure.cl
index 6763f5c72..4f81378c5 100644
--- a/OpenCL/m04010_a0-pure.cl
+++ b/OpenCL/m04010_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04010_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04010_a1-optimized.cl b/OpenCL/m04010_a1-optimized.cl
index 70ee618f5..e7b0eae16 100644
--- a/OpenCL/m04010_a1-optimized.cl
+++ b/OpenCL/m04010_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04010_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04010_a1-pure.cl b/OpenCL/m04010_a1-pure.cl
index 856ec75ec..60eac950f 100644
--- a/OpenCL/m04010_a1-pure.cl
+++ b/OpenCL/m04010_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04010_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04010_a3-optimized.cl b/OpenCL/m04010_a3-optimized.cl
index 8e3297cf7..fab737d45 100644
--- a/OpenCL/m04010_a3-optimized.cl
+++ b/OpenCL/m04010_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m04010m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m04010_a3-pure.cl b/OpenCL/m04010_a3-pure.cl
index b3b855041..bc5fddc8d 100644
--- a/OpenCL/m04010_a3-pure.cl
+++ b/OpenCL/m04010_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04010_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m04110_a0-optimized.cl b/OpenCL/m04110_a0-optimized.cl
index 1b6e55088..9b32de16c 100644
--- a/OpenCL/m04110_a0-optimized.cl
+++ b/OpenCL/m04110_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04110_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04110_a0-pure.cl b/OpenCL/m04110_a0-pure.cl
index c8e7ce93f..12719e888 100644
--- a/OpenCL/m04110_a0-pure.cl
+++ b/OpenCL/m04110_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04110_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04110_a1-optimized.cl b/OpenCL/m04110_a1-optimized.cl
index 16c65cef1..be22bd1ea 100644
--- a/OpenCL/m04110_a1-optimized.cl
+++ b/OpenCL/m04110_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04110_a1-pure.cl b/OpenCL/m04110_a1-pure.cl
index 011852191..2a2322a71 100644
--- a/OpenCL/m04110_a1-pure.cl
+++ b/OpenCL/m04110_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04110_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04110_a3-optimized.cl b/OpenCL/m04110_a3-optimized.cl
index 0ca3c7898..1dcd04b03 100644
--- a/OpenCL/m04110_a3-optimized.cl
+++ b/OpenCL/m04110_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m04110m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m04110_a3-pure.cl b/OpenCL/m04110_a3-pure.cl
index 1e33b0a62..490ecd667 100644
--- a/OpenCL/m04110_a3-pure.cl
+++ b/OpenCL/m04110_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04110_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m04310_a0-optimized.cl b/OpenCL/m04310_a0-optimized.cl
index f29f76f6a..b8f7c10e7 100644
--- a/OpenCL/m04310_a0-optimized.cl
+++ b/OpenCL/m04310_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04310_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04310_a0-pure.cl b/OpenCL/m04310_a0-pure.cl
index 422a6a5e7..67496d227 100644
--- a/OpenCL/m04310_a0-pure.cl
+++ b/OpenCL/m04310_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04310_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04310_a1-optimized.cl b/OpenCL/m04310_a1-optimized.cl
index 78448cf61..a78a21694 100644
--- a/OpenCL/m04310_a1-optimized.cl
+++ b/OpenCL/m04310_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04310_a1-pure.cl b/OpenCL/m04310_a1-pure.cl
index 68b097fdd..6b219b473 100644
--- a/OpenCL/m04310_a1-pure.cl
+++ b/OpenCL/m04310_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04310_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04310_a3-optimized.cl b/OpenCL/m04310_a3-optimized.cl
index 53ed7a4cb..4a8572373 100644
--- a/OpenCL/m04310_a3-optimized.cl
+++ b/OpenCL/m04310_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m04310m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m04310_a3-pure.cl b/OpenCL/m04310_a3-pure.cl
index f5e5fa707..cd83bb46a 100644
--- a/OpenCL/m04310_a3-pure.cl
+++ b/OpenCL/m04310_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04310_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m04400_a0-optimized.cl b/OpenCL/m04400_a0-optimized.cl
index 4c8d31c33..b192a5859 100644
--- a/OpenCL/m04400_a0-optimized.cl
+++ b/OpenCL/m04400_a0-optimized.cl
@@ -18,15 +18,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04400_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04400_a0-pure.cl b/OpenCL/m04400_a0-pure.cl
index 68e8a657c..35ca3e386 100644
--- a/OpenCL/m04400_a0-pure.cl
+++ b/OpenCL/m04400_a0-pure.cl
@@ -18,15 +18,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04400_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04400_a1-optimized.cl b/OpenCL/m04400_a1-optimized.cl
index 19b432aad..cbf0cfdf5 100644
--- a/OpenCL/m04400_a1-optimized.cl
+++ b/OpenCL/m04400_a1-optimized.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04400_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04400_a1-pure.cl b/OpenCL/m04400_a1-pure.cl
index 4968aa90e..7247f7b14 100644
--- a/OpenCL/m04400_a1-pure.cl
+++ b/OpenCL/m04400_a1-pure.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04400_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04400_a3-optimized.cl b/OpenCL/m04400_a3-optimized.cl
index 153f13caf..72aea5c55 100644
--- a/OpenCL/m04400_a3-optimized.cl
+++ b/OpenCL/m04400_a3-optimized.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m04400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m04400_a3-pure.cl b/OpenCL/m04400_a3-pure.cl
index 60c8eb745..e3eb903f6 100644
--- a/OpenCL/m04400_a3-pure.cl
+++ b/OpenCL/m04400_a3-pure.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04400_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m04500_a0-optimized.cl b/OpenCL/m04500_a0-optimized.cl
index c8cd30328..ba71bbcb8 100644
--- a/OpenCL/m04500_a0-optimized.cl
+++ b/OpenCL/m04500_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04500_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04500_a0-pure.cl b/OpenCL/m04500_a0-pure.cl
index dcea1b78a..b3f207eaa 100644
--- a/OpenCL/m04500_a0-pure.cl
+++ b/OpenCL/m04500_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04500_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04500_a1-optimized.cl b/OpenCL/m04500_a1-optimized.cl
index 05010648b..6e58bb67a 100644
--- a/OpenCL/m04500_a1-optimized.cl
+++ b/OpenCL/m04500_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04500_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04500_a1-pure.cl b/OpenCL/m04500_a1-pure.cl
index c480f3ecf..32b79fb03 100644
--- a/OpenCL/m04500_a1-pure.cl
+++ b/OpenCL/m04500_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04500_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04500_a3-optimized.cl b/OpenCL/m04500_a3-optimized.cl
index 2809cd1ae..0c79696db 100644
--- a/OpenCL/m04500_a3-optimized.cl
+++ b/OpenCL/m04500_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m04500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m04500_a3-pure.cl b/OpenCL/m04500_a3-pure.cl
index b258f97c3..735f512c7 100644
--- a/OpenCL/m04500_a3-pure.cl
+++ b/OpenCL/m04500_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04500_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m04520_a0-optimized.cl b/OpenCL/m04520_a0-optimized.cl
index 73a119f5d..ffea45f02 100644
--- a/OpenCL/m04520_a0-optimized.cl
+++ b/OpenCL/m04520_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04520_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04520_a0-pure.cl b/OpenCL/m04520_a0-pure.cl
index 3e7e3034c..1c3aa88ea 100644
--- a/OpenCL/m04520_a0-pure.cl
+++ b/OpenCL/m04520_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04520_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04520_a1-optimized.cl b/OpenCL/m04520_a1-optimized.cl
index 8bd6e12ab..5758a6bd8 100644
--- a/OpenCL/m04520_a1-optimized.cl
+++ b/OpenCL/m04520_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04520_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04520_a1-pure.cl b/OpenCL/m04520_a1-pure.cl
index 3556b9cc6..1ef295b06 100644
--- a/OpenCL/m04520_a1-pure.cl
+++ b/OpenCL/m04520_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04520_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04520_a3-optimized.cl b/OpenCL/m04520_a3-optimized.cl
index f97e9dcb4..0a40509bb 100644
--- a/OpenCL/m04520_a3-optimized.cl
+++ b/OpenCL/m04520_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m04520m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m04520_a3-pure.cl b/OpenCL/m04520_a3-pure.cl
index c1f4aa845..8f4564d77 100644
--- a/OpenCL/m04520_a3-pure.cl
+++ b/OpenCL/m04520_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04520_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m04700_a0-optimized.cl b/OpenCL/m04700_a0-optimized.cl
index 4b7bbba9f..e6d1a2468 100644
--- a/OpenCL/m04700_a0-optimized.cl
+++ b/OpenCL/m04700_a0-optimized.cl
@@ -18,15 +18,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04700_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04700_a0-pure.cl b/OpenCL/m04700_a0-pure.cl
index 601675b55..9c586f214 100644
--- a/OpenCL/m04700_a0-pure.cl
+++ b/OpenCL/m04700_a0-pure.cl
@@ -18,15 +18,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04700_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m04700_a1-optimized.cl b/OpenCL/m04700_a1-optimized.cl
index 7b5e61c03..f3989d858 100644
--- a/OpenCL/m04700_a1-optimized.cl
+++ b/OpenCL/m04700_a1-optimized.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04700_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04700_a1-pure.cl b/OpenCL/m04700_a1-pure.cl
index 9fb80692a..93ae560bf 100644
--- a/OpenCL/m04700_a1-pure.cl
+++ b/OpenCL/m04700_a1-pure.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04700_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m04700_a3-optimized.cl b/OpenCL/m04700_a3-optimized.cl
index 3d431931a..4b22f8887 100644
--- a/OpenCL/m04700_a3-optimized.cl
+++ b/OpenCL/m04700_a3-optimized.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m04700m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m04700_a3-pure.cl b/OpenCL/m04700_a3-pure.cl
index 633569c2b..9e6a668ef 100644
--- a/OpenCL/m04700_a3-pure.cl
+++ b/OpenCL/m04700_a3-pure.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04700_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m04800_a1-optimized.cl b/OpenCL/m04800_a1-optimized.cl
index 3384bcc4e..4e2e0fe07 100644
--- a/OpenCL/m04800_a1-optimized.cl
+++ b/OpenCL/m04800_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m04800_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m05500_a0-optimized.cl b/OpenCL/m05500_a0-optimized.cl
index 2b1866fc3..6a5c8451c 100644
--- a/OpenCL/m05500_a0-optimized.cl
+++ b/OpenCL/m05500_a0-optimized.cl
@@ -347,13 +347,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m05500_a0-pure.cl b/OpenCL/m05500_a0-pure.cl
index db786716b..7681eea15 100644
--- a/OpenCL/m05500_a0-pure.cl
+++ b/OpenCL/m05500_a0-pure.cl
@@ -347,13 +347,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m05500_a1-optimized.cl b/OpenCL/m05500_a1-optimized.cl
index 48c752d9c..d4ae2526a 100644
--- a/OpenCL/m05500_a1-optimized.cl
+++ b/OpenCL/m05500_a1-optimized.cl
@@ -345,13 +345,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m05500_a1-pure.cl b/OpenCL/m05500_a1-pure.cl
index 80e3b431a..28b5a627c 100644
--- a/OpenCL/m05500_a1-pure.cl
+++ b/OpenCL/m05500_a1-pure.cl
@@ -345,13 +345,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl
index aecbd8664..90a3c3b14 100644
--- a/OpenCL/m05500_a3-optimized.cl
+++ b/OpenCL/m05500_a3-optimized.cl
@@ -345,13 +345,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m05500_a3-pure.cl b/OpenCL/m05500_a3-pure.cl
index 298317f33..25e6392fb 100644
--- a/OpenCL/m05500_a3-pure.cl
+++ b/OpenCL/m05500_a3-pure.cl
@@ -345,13 +345,13 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m06900_a0-optimized.cl b/OpenCL/m06900_a0-optimized.cl
index e199d70df..5d42eb2ae 100644
--- a/OpenCL/m06900_a0-optimized.cl
+++ b/OpenCL/m06900_a0-optimized.cl
@@ -286,13 +286,13 @@ CONSTANT_VK u32a c_tables[4][256] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #define _round(k1,k2,tbl)                 \
diff --git a/OpenCL/m06900_a1-optimized.cl b/OpenCL/m06900_a1-optimized.cl
index bdf7943bc..60fed6359 100644
--- a/OpenCL/m06900_a1-optimized.cl
+++ b/OpenCL/m06900_a1-optimized.cl
@@ -284,13 +284,13 @@ CONSTANT_VK u32a c_tables[4][256] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #define _round(k1,k2,tbl)                 \
diff --git a/OpenCL/m06900_a3-optimized.cl b/OpenCL/m06900_a3-optimized.cl
index 5becbdca7..a0f576783 100644
--- a/OpenCL/m06900_a3-optimized.cl
+++ b/OpenCL/m06900_a3-optimized.cl
@@ -284,13 +284,13 @@ CONSTANT_VK u32a c_tables[4][256] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #define _round(k1,k2,tbl)                 \
diff --git a/OpenCL/m08400_a0-optimized.cl b/OpenCL/m08400_a0-optimized.cl
index 80dce4336..ea97818be 100644
--- a/OpenCL/m08400_a0-optimized.cl
+++ b/OpenCL/m08400_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m08400_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m08400_a0-pure.cl b/OpenCL/m08400_a0-pure.cl
index 21a74fdfd..b88e9aa8a 100644
--- a/OpenCL/m08400_a0-pure.cl
+++ b/OpenCL/m08400_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m08400_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m08400_a1-optimized.cl b/OpenCL/m08400_a1-optimized.cl
index 7bb51a077..5d1ef435c 100644
--- a/OpenCL/m08400_a1-optimized.cl
+++ b/OpenCL/m08400_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m08400_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m08400_a1-pure.cl b/OpenCL/m08400_a1-pure.cl
index f78b4c31a..b799d8faf 100644
--- a/OpenCL/m08400_a1-pure.cl
+++ b/OpenCL/m08400_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m08400_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m08400_a3-optimized.cl b/OpenCL/m08400_a3-optimized.cl
index d981512bd..96afc5f8a 100644
--- a/OpenCL/m08400_a3-optimized.cl
+++ b/OpenCL/m08400_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m08400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m08400_a3-pure.cl b/OpenCL/m08400_a3-pure.cl
index e0b3463b6..0e224662d 100644
--- a/OpenCL/m08400_a3-pure.cl
+++ b/OpenCL/m08400_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m08400_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m08500_a0-pure.cl b/OpenCL/m08500_a0-pure.cl
index 5e0a0d044..03f57c0ad 100644
--- a/OpenCL/m08500_a0-pure.cl
+++ b/OpenCL/m08500_a0-pure.cl
@@ -374,25 +374,25 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(i,S) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m08500_a1-pure.cl b/OpenCL/m08500_a1-pure.cl
index 511c8da0e..2ca27bd1f 100644
--- a/OpenCL/m08500_a1-pure.cl
+++ b/OpenCL/m08500_a1-pure.cl
@@ -372,25 +372,25 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(i,S) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m08500_a3-pure.cl b/OpenCL/m08500_a3-pure.cl
index e59d35470..36f27f2fd 100644
--- a/OpenCL/m08500_a3-pure.cl
+++ b/OpenCL/m08500_a3-pure.cl
@@ -372,25 +372,25 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(i,S) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m08600_a0-pure.cl b/OpenCL/m08600_a0-pure.cl
index 0e6014846..486362174 100644
--- a/OpenCL/m08600_a0-pure.cl
+++ b/OpenCL/m08600_a0-pure.cl
@@ -55,13 +55,13 @@ CONSTANT_VK u32a lotus_magic_table[256] =
 #if   VECT_SIZE == 1
 #define BOX1(S,i) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void lotus_mix (u32 *in, LOCAL_AS u32 *s_lotus_magic_table)
diff --git a/OpenCL/m08600_a1-pure.cl b/OpenCL/m08600_a1-pure.cl
index 694e72ad6..1260a50cf 100644
--- a/OpenCL/m08600_a1-pure.cl
+++ b/OpenCL/m08600_a1-pure.cl
@@ -53,13 +53,13 @@ CONSTANT_VK u32a lotus_magic_table[256] =
 #if   VECT_SIZE == 1
 #define BOX1(S,i) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void lotus_mix (u32 *in, LOCAL_AS u32 *s_lotus_magic_table)
diff --git a/OpenCL/m08600_a3-pure.cl b/OpenCL/m08600_a3-pure.cl
index deff07c41..fc58fcc34 100644
--- a/OpenCL/m08600_a3-pure.cl
+++ b/OpenCL/m08600_a3-pure.cl
@@ -52,13 +52,13 @@ CONSTANT_VK u32a lotus_magic_table[256] =
 #if   VECT_SIZE == 1
 #define BOX1(S,i) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void lotus_mix (u32x *in, LOCAL_AS u32 *s_lotus_magic_table)
diff --git a/OpenCL/m08700_a0-optimized.cl b/OpenCL/m08700_a0-optimized.cl
index 7b63731ac..2922cfb39 100644
--- a/OpenCL/m08700_a0-optimized.cl
+++ b/OpenCL/m08700_a0-optimized.cl
@@ -53,27 +53,27 @@ CONSTANT_VK u32a lotus_magic_table[256] =
 };
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(S,i) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void lotus_mix (u32x *in, LOCAL_AS u32 *s_lotus_magic_table)
diff --git a/OpenCL/m08700_a1-optimized.cl b/OpenCL/m08700_a1-optimized.cl
index 240669d0c..02d3873c3 100644
--- a/OpenCL/m08700_a1-optimized.cl
+++ b/OpenCL/m08700_a1-optimized.cl
@@ -51,27 +51,27 @@ CONSTANT_VK u32a lotus_magic_table[256] =
 };
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(S,i) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void lotus_mix (u32x *in, LOCAL_AS u32 *s_lotus_magic_table)
diff --git a/OpenCL/m08700_a3-optimized.cl b/OpenCL/m08700_a3-optimized.cl
index 79121b96c..f15b3722d 100644
--- a/OpenCL/m08700_a3-optimized.cl
+++ b/OpenCL/m08700_a3-optimized.cl
@@ -52,27 +52,27 @@ CONSTANT_VK u32a lotus_magic_table[256] =
 #define BOX(S,i) (S)[(i)]
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(S,i) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(S,i) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void lotus_mix (u32x *in, LOCAL_AS u32 *s_lotus_magic_table)
diff --git a/OpenCL/m11100_a0-optimized.cl b/OpenCL/m11100_a0-optimized.cl
index 2f77f1366..c0e12f986 100644
--- a/OpenCL/m11100_a0-optimized.cl
+++ b/OpenCL/m11100_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11100_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m11100_a0-pure.cl b/OpenCL/m11100_a0-pure.cl
index 5896ea564..c01fe3983 100644
--- a/OpenCL/m11100_a0-pure.cl
+++ b/OpenCL/m11100_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11100_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m11100_a1-optimized.cl b/OpenCL/m11100_a1-optimized.cl
index 1f003b012..1ee64d6d7 100644
--- a/OpenCL/m11100_a1-optimized.cl
+++ b/OpenCL/m11100_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m11100_a1-pure.cl b/OpenCL/m11100_a1-pure.cl
index 08c85eea5..29b085050 100644
--- a/OpenCL/m11100_a1-pure.cl
+++ b/OpenCL/m11100_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11100_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m11100_a3-optimized.cl b/OpenCL/m11100_a3-optimized.cl
index 6e1abef0c..58a49c9aa 100644
--- a/OpenCL/m11100_a3-optimized.cl
+++ b/OpenCL/m11100_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m11100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m11100_a3-pure.cl b/OpenCL/m11100_a3-pure.cl
index 68b100050..de5089ebc 100644
--- a/OpenCL/m11100_a3-pure.cl
+++ b/OpenCL/m11100_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11100_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m11200_a3-pure.cl b/OpenCL/m11200_a3-pure.cl
index 4f63a5f52..76bb6a866 100644
--- a/OpenCL/m11200_a3-pure.cl
+++ b/OpenCL/m11200_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11200_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m11400_a0-pure.cl b/OpenCL/m11400_a0-pure.cl
index d31492c77..ef1074fe1 100644
--- a/OpenCL/m11400_a0-pure.cl
+++ b/OpenCL/m11400_a0-pure.cl
@@ -27,15 +27,15 @@ typedef struct sip
 } sip_t;
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11400_mxx (KERN_ATTR_RULES_ESALT (sip_t))
diff --git a/OpenCL/m11400_a1-pure.cl b/OpenCL/m11400_a1-pure.cl
index f691a8f11..85cdc5ee6 100644
--- a/OpenCL/m11400_a1-pure.cl
+++ b/OpenCL/m11400_a1-pure.cl
@@ -25,15 +25,15 @@ typedef struct sip
 } sip_t;
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11400_mxx (KERN_ATTR_ESALT (sip_t))
diff --git a/OpenCL/m11400_a3-pure.cl b/OpenCL/m11400_a3-pure.cl
index 595eb30ec..b933c43d9 100644
--- a/OpenCL/m11400_a3-pure.cl
+++ b/OpenCL/m11400_a3-pure.cl
@@ -25,15 +25,15 @@ typedef struct sip
 } sip_t;
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m11400_mxx (KERN_ATTR_VECTOR_ESALT (sip_t))
diff --git a/OpenCL/m11500_a0-optimized.cl b/OpenCL/m11500_a0-optimized.cl
index 9a77848e9..1ea2ec7e4 100644
--- a/OpenCL/m11500_a0-optimized.cl
+++ b/OpenCL/m11500_a0-optimized.cl
@@ -91,15 +91,15 @@ DECLSPEC u32x round_crc32 (u32x a, const u32x v)
   const u32x s = a >> 8;
 
   #if   VECT_SIZE == 1
-  a = (u32x) crc32tab[k];
+  a = make_u32x crc32tab[k];
   #elif VECT_SIZE == 2
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1]);
   #elif VECT_SIZE == 4
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3]);
   #elif VECT_SIZE == 8
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7]);
   #elif VECT_SIZE == 16
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7], crc32tab[k.s8], crc32tab[k.s9], crc32tab[k.sa], crc32tab[k.sb], crc32tab[k.sc], crc32tab[k.sd], crc32tab[k.se], crc32tab[k.sf]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7], crc32tab[k.s8], crc32tab[k.s9], crc32tab[k.sa], crc32tab[k.sb], crc32tab[k.sc], crc32tab[k.sd], crc32tab[k.se], crc32tab[k.sf]);
   #endif
 
   a ^= s;
diff --git a/OpenCL/m11500_a1-optimized.cl b/OpenCL/m11500_a1-optimized.cl
index 06a8b1ebb..b3390f961 100644
--- a/OpenCL/m11500_a1-optimized.cl
+++ b/OpenCL/m11500_a1-optimized.cl
@@ -89,15 +89,15 @@ DECLSPEC u32x round_crc32 (u32x a, const u32x v)
   const u32x s = a >> 8;
 
   #if   VECT_SIZE == 1
-  a = (u32x) crc32tab[k];
+  a = make_u32x crc32tab[k];
   #elif VECT_SIZE == 2
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1]);
   #elif VECT_SIZE == 4
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3]);
   #elif VECT_SIZE == 8
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7]);
   #elif VECT_SIZE == 16
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7], crc32tab[k.s8], crc32tab[k.s9], crc32tab[k.sa], crc32tab[k.sb], crc32tab[k.sc], crc32tab[k.sd], crc32tab[k.se], crc32tab[k.sf]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7], crc32tab[k.s8], crc32tab[k.s9], crc32tab[k.sa], crc32tab[k.sb], crc32tab[k.sc], crc32tab[k.sd], crc32tab[k.se], crc32tab[k.sf]);
   #endif
 
   a ^= s;
diff --git a/OpenCL/m11500_a3-optimized.cl b/OpenCL/m11500_a3-optimized.cl
index 698b93325..d10b09401 100644
--- a/OpenCL/m11500_a3-optimized.cl
+++ b/OpenCL/m11500_a3-optimized.cl
@@ -89,15 +89,15 @@ DECLSPEC u32x round_crc32 (u32x a, const u32x v)
   const u32x s = a >> 8;
 
   #if   VECT_SIZE == 1
-  a = (u32x) crc32tab[k];
+  a = make_u32x crc32tab[k];
   #elif VECT_SIZE == 2
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1]);
   #elif VECT_SIZE == 4
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3]);
   #elif VECT_SIZE == 8
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7]);
   #elif VECT_SIZE == 16
-  a = (u32x) (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7], crc32tab[k.s8], crc32tab[k.s9], crc32tab[k.sa], crc32tab[k.sb], crc32tab[k.sc], crc32tab[k.sd], crc32tab[k.se], crc32tab[k.sf]);
+  a = make_u32x (crc32tab[k.s0], crc32tab[k.s1], crc32tab[k.s2], crc32tab[k.s3], crc32tab[k.s4], crc32tab[k.s5], crc32tab[k.s6], crc32tab[k.s7], crc32tab[k.s8], crc32tab[k.s9], crc32tab[k.sa], crc32tab[k.sb], crc32tab[k.sc], crc32tab[k.sd], crc32tab[k.se], crc32tab[k.sf]);
   #endif
 
   a ^= s;
diff --git a/OpenCL/m12600_a0-optimized.cl b/OpenCL/m12600_a0-optimized.cl
index a723b3f1d..afc21b471 100644
--- a/OpenCL/m12600_a0-optimized.cl
+++ b/OpenCL/m12600_a0-optimized.cl
@@ -18,15 +18,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m12600_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m12600_a0-pure.cl b/OpenCL/m12600_a0-pure.cl
index 6519cc9d1..7222b668b 100644
--- a/OpenCL/m12600_a0-pure.cl
+++ b/OpenCL/m12600_a0-pure.cl
@@ -18,15 +18,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m12600_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m12600_a1-optimized.cl b/OpenCL/m12600_a1-optimized.cl
index 76d5f1537..fe78a19cc 100644
--- a/OpenCL/m12600_a1-optimized.cl
+++ b/OpenCL/m12600_a1-optimized.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m12600_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m12600_a1-pure.cl b/OpenCL/m12600_a1-pure.cl
index 420f8491f..c861a06df 100644
--- a/OpenCL/m12600_a1-pure.cl
+++ b/OpenCL/m12600_a1-pure.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m12600_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m12600_a3-optimized.cl b/OpenCL/m12600_a3-optimized.cl
index 65d99f23d..88310a835 100644
--- a/OpenCL/m12600_a3-optimized.cl
+++ b/OpenCL/m12600_a3-optimized.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_upper8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m12600m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m12600_a3-pure.cl b/OpenCL/m12600_a3-pure.cl
index 23f23023f..880a9adc4 100644
--- a/OpenCL/m12600_a3-pure.cl
+++ b/OpenCL/m12600_a3-pure.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m12600_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m13900_a0-optimized.cl b/OpenCL/m13900_a0-optimized.cl
index 35c823240..53e265ae8 100644
--- a/OpenCL/m13900_a0-optimized.cl
+++ b/OpenCL/m13900_a0-optimized.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m13900_m04 (KERN_ATTR_RULES ())
diff --git a/OpenCL/m13900_a0-pure.cl b/OpenCL/m13900_a0-pure.cl
index 86e2825d9..ecc8d6db3 100644
--- a/OpenCL/m13900_a0-pure.cl
+++ b/OpenCL/m13900_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m13900_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m13900_a1-optimized.cl b/OpenCL/m13900_a1-optimized.cl
index f6eed8e47..514c95913 100644
--- a/OpenCL/m13900_a1-optimized.cl
+++ b/OpenCL/m13900_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m13900_m04 (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m13900_a1-pure.cl b/OpenCL/m13900_a1-pure.cl
index 1f97e165b..7d34187ea 100644
--- a/OpenCL/m13900_a1-pure.cl
+++ b/OpenCL/m13900_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m13900_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m13900_a3-optimized.cl b/OpenCL/m13900_a3-optimized.cl
index a28ac46e5..21cf1e92a 100644
--- a/OpenCL/m13900_a3-optimized.cl
+++ b/OpenCL/m13900_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void m13900m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
diff --git a/OpenCL/m13900_a3-pure.cl b/OpenCL/m13900_a3-pure.cl
index a109bb039..a0f8a5eb1 100644
--- a/OpenCL/m13900_a3-pure.cl
+++ b/OpenCL/m13900_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m13900_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m14000_a0-pure.cl b/OpenCL/m14000_a0-pure.cl
index 86611ffa8..b6fa198cc 100644
--- a/OpenCL/m14000_a0-pure.cl
+++ b/OpenCL/m14000_a0-pure.cl
@@ -360,25 +360,25 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(i,S) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m14000_a1-pure.cl b/OpenCL/m14000_a1-pure.cl
index ee2f6f49f..4845c7f47 100644
--- a/OpenCL/m14000_a1-pure.cl
+++ b/OpenCL/m14000_a1-pure.cl
@@ -350,25 +350,25 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(i,S) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m14100_a0-pure.cl b/OpenCL/m14100_a0-pure.cl
index eeb86866e..780b7fae2 100644
--- a/OpenCL/m14100_a0-pure.cl
+++ b/OpenCL/m14100_a0-pure.cl
@@ -360,25 +360,25 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(i,S) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m14100_a1-pure.cl b/OpenCL/m14100_a1-pure.cl
index a96243e94..923e3bf61 100644
--- a/OpenCL/m14100_a1-pure.cl
+++ b/OpenCL/m14100_a1-pure.cl
@@ -350,25 +350,25 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(i,S) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m14100_a3-pure.cl b/OpenCL/m14100_a3-pure.cl
index 980de5f19..4b46cf5e8 100644
--- a/OpenCL/m14100_a3-pure.cl
+++ b/OpenCL/m14100_a3-pure.cl
@@ -350,25 +350,25 @@ CONSTANT_VK u32a c_skb[8][64] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 #if   VECT_SIZE == 1
 #define BOX1(i,S) (S)[(i)]
 #elif VECT_SIZE == 2
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1])
 #elif VECT_SIZE == 4
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3])
 #elif VECT_SIZE == 8
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7])
 #elif VECT_SIZE == 16
-#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
+#define BOX1(i,S) make_u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_SPtrans)[64])
diff --git a/OpenCL/m14400_a0-optimized.cl b/OpenCL/m14400_a0-optimized.cl
index 9d51ac908..3f976a056 100644
--- a/OpenCL/m14400_a0-optimized.cl
+++ b/OpenCL/m14400_a0-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void append_4 (const u32 offset, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 src_r0)
diff --git a/OpenCL/m14400_a0-pure.cl b/OpenCL/m14400_a0-pure.cl
index 178eac247..f57dbe814 100644
--- a/OpenCL/m14400_a0-pure.cl
+++ b/OpenCL/m14400_a0-pure.cl
@@ -17,15 +17,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m14400_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m14400_a1-optimized.cl b/OpenCL/m14400_a1-optimized.cl
index c2702dd1e..3bbabbe4b 100644
--- a/OpenCL/m14400_a1-optimized.cl
+++ b/OpenCL/m14400_a1-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void append_4 (const u32 offset, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 src_r0)
diff --git a/OpenCL/m14400_a1-pure.cl b/OpenCL/m14400_a1-pure.cl
index 1a6dd7c89..77d5c7e8a 100644
--- a/OpenCL/m14400_a1-pure.cl
+++ b/OpenCL/m14400_a1-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m14400_mxx (KERN_ATTR_BASIC ())
diff --git a/OpenCL/m14400_a3-optimized.cl b/OpenCL/m14400_a3-optimized.cl
index cb3d80b0e..e30be7856 100644
--- a/OpenCL/m14400_a3-optimized.cl
+++ b/OpenCL/m14400_a3-optimized.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 DECLSPEC void append_4 (const u32 offset, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 src_r0)
diff --git a/OpenCL/m14400_a3-pure.cl b/OpenCL/m14400_a3-pure.cl
index eaea0495d..324cf988e 100644
--- a/OpenCL/m14400_a3-pure.cl
+++ b/OpenCL/m14400_a3-pure.cl
@@ -15,15 +15,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m14400_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m16000_a0-pure.cl b/OpenCL/m16000_a0-pure.cl
index a7f968a00..7f55f09b6 100644
--- a/OpenCL/m16000_a0-pure.cl
+++ b/OpenCL/m16000_a0-pure.cl
@@ -346,13 +346,13 @@ CONSTANT_VK u32a c_tripcode_salt[128] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_keysetup (u32 c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64])
diff --git a/OpenCL/m16000_a1-pure.cl b/OpenCL/m16000_a1-pure.cl
index 653574dc6..8c9900a0a 100644
--- a/OpenCL/m16000_a1-pure.cl
+++ b/OpenCL/m16000_a1-pure.cl
@@ -344,13 +344,13 @@ CONSTANT_VK u32a c_tripcode_salt[128] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_keysetup (u32 c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64])
diff --git a/OpenCL/m16000_a3-pure.cl b/OpenCL/m16000_a3-pure.cl
index d847db740..11d655e98 100644
--- a/OpenCL/m16000_a3-pure.cl
+++ b/OpenCL/m16000_a3-pure.cl
@@ -344,13 +344,13 @@ CONSTANT_VK u32a c_tripcode_salt[128] =
 #if   VECT_SIZE == 1
 #define BOX(i,n,S) (S)[(n)][(i)]
 #elif VECT_SIZE == 2
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
 #elif VECT_SIZE == 4
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
 #elif VECT_SIZE == 8
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
 #elif VECT_SIZE == 16
-#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
+#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
 #endif
 
 DECLSPEC void _des_crypt_keysetup (u32 c, u32x d, u32x *Kc, u32x *Kd, LOCAL_AS u32 (*s_skb)[64])
diff --git a/OpenCL/m18500_a0-pure.cl b/OpenCL/m18500_a0-pure.cl
index 922ba1c01..857d380ee 100644
--- a/OpenCL/m18500_a0-pure.cl
+++ b/OpenCL/m18500_a0-pure.cl
@@ -18,15 +18,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m18500_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m18500_a1-pure.cl b/OpenCL/m18500_a1-pure.cl
index 3047a0129..4de2e20cb 100644
--- a/OpenCL/m18500_a1-pure.cl
+++ b/OpenCL/m18500_a1-pure.cl
@@ -18,15 +18,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m18500_mxx (KERN_ATTR_RULES ())
diff --git a/OpenCL/m18500_a3-pure.cl b/OpenCL/m18500_a3-pure.cl
index 5ef993ac0..8eb1dac9d 100644
--- a/OpenCL/m18500_a3-pure.cl
+++ b/OpenCL/m18500_a3-pure.cl
@@ -16,15 +16,15 @@
 #endif
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m18500_mxx (KERN_ATTR_VECTOR ())
diff --git a/OpenCL/m19500_a0-pure.cl b/OpenCL/m19500_a0-pure.cl
index c9b4cbfad..5ebfaea0c 100644
--- a/OpenCL/m19500_a0-pure.cl
+++ b/OpenCL/m19500_a0-pure.cl
@@ -27,15 +27,15 @@ typedef struct devise_hash
 } devise_hash_t;
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m19500_mxx (KERN_ATTR_RULES_ESALT (devise_hash_t))
diff --git a/OpenCL/m19500_a1-pure.cl b/OpenCL/m19500_a1-pure.cl
index a76d95d70..464f534f4 100644
--- a/OpenCL/m19500_a1-pure.cl
+++ b/OpenCL/m19500_a1-pure.cl
@@ -25,15 +25,15 @@ typedef struct devise_hash
 } devise_hash_t;
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m19500_mxx (KERN_ATTR_ESALT (devise_hash_t))
diff --git a/OpenCL/m19500_a3-pure.cl b/OpenCL/m19500_a3-pure.cl
index 50dce1e7d..a8f0fa8cd 100644
--- a/OpenCL/m19500_a3-pure.cl
+++ b/OpenCL/m19500_a3-pure.cl
@@ -25,15 +25,15 @@ typedef struct devise_hash
 } devise_hash_t;
 
 #if   VECT_SIZE == 1
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i)])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
 #elif VECT_SIZE == 2
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
 #elif VECT_SIZE == 4
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
 #elif VECT_SIZE == 8
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
 #elif VECT_SIZE == 16
-#define uint_to_hex_lower8_le(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
 KERNEL_FQ void m19500_mxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t))

From 027af75a396be3d0ade6440a3dbb51b2c1ce1158 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 8 May 2019 20:42:46 +0200
Subject: [PATCH 41/73] Fix rotate function names

---
 OpenCL/inc_platform.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index 1055838c0..30069cda5 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -14,14 +14,14 @@ DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
 DECLSPEC size_t get_local_id    (const u32 dimindx __attribute__((unused)));
 DECLSPEC size_t get_local_size  (const u32 dimindx __attribute__((unused)));
 
-DECLSPEC u32x hc_rotl32   (const u32x a, const int n);
-DECLSPEC u32x hc_rotr32   (const u32x a, const int n);
-DECLSPEC u32  hc_rotl32_S (const u32  a, const int n);
-DECLSPEC u32  hc_rotr32_S (const u32  a, const int n);
-DECLSPEC u64x hc_rotl64   (const u64x a, const int n);
-DECLSPEC u64x hc_rotr64   (const u64x a, const int n);
-DECLSPEC u64  hc_rotl64_S (const u64  a, const int n);
-DECLSPEC u64  hc_rotr64_S (const u64  a, const int n);
+DECLSPEC u32x rotl32   (const u32x a, const int n);
+DECLSPEC u32x rotr32   (const u32x a, const int n);
+DECLSPEC u32  rotl32_S (const u32  a, const int n);
+DECLSPEC u32  rotr32_S (const u32  a, const int n);
+DECLSPEC u64x rotl64   (const u64x a, const int n);
+DECLSPEC u64x rotr64   (const u64x a, const int n);
+DECLSPEC u64  rotl64_S (const u64  a, const int n);
+DECLSPEC u64  rotr64_S (const u64  a, const int n);
 
 //#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n))))
 #define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))

From 3a3df091c78eed28e8294b0b9bc99484e6466933 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 8 May 2019 22:42:52 +0200
Subject: [PATCH 42/73] Fix CUDA num_elements

---
 src/backend.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index a1d75c8a6..c5ea4bf1f 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2883,7 +2883,7 @@ int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
 
   const u64 kernel_threads = device_param->kernel_wgs_atinit;
 
-  num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+  num_elements = CEILDIV (num_elements, kernel_threads);
 
   CUfunction function = device_param->cuda_function_atinit;
 
@@ -2913,7 +2913,7 @@ int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
 
     u64 num_elements = num16d;
 
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    num_elements = CEILDIV (num_elements, kernel_threads);
 
     CUfunction function = device_param->cuda_function_memset;
 
@@ -3111,12 +3111,14 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       case KERN_RUN_AUX4:   local_mem_size  = device_param->kernel_local_mem_size_aux4;   break;
     }
 
+    /*
     if (local_mem_size)
     {
       const u32 max_threads_possible = (device_param->device_local_mem_size - 240) / local_mem_size;
 
       kernel_threads = MIN (kernel_threads, max_threads_possible);
     }
+    */
 
     CUfunction cuda_function = NULL;
 
@@ -3139,7 +3141,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    num_elements = CEILDIV (num_elements, kernel_threads);
 
     if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
     {
@@ -3147,7 +3149,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
       if (rc_cuEventRecord1 == -1) return -1;
 
-      const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements / 32, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
+      const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
 
       if (rc_cuLaunchKernel == -1) return -1;
 
@@ -3179,8 +3181,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
         }
       }
 
-      num_elements = round_up_multiple_64 (num_elements, kernel_threads);
-
       const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream);
 
       if (rc_cuEventRecord1 == -1) return -1;
@@ -3472,7 +3472,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
                           break;
     }
 
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    num_elements = CEILDIV (num_elements, kernel_threads);
 
     const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, cuda_args, NULL);
 
@@ -3597,7 +3597,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
   if (device_param->is_cuda == true)
   {
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    num_elements = CEILDIV (num_elements, kernel_threads);
 
     CUfunction cuda_function = device_param->cuda_function_amp;
 
@@ -3651,7 +3651,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
   if (device_param->is_cuda == true)
   {
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    num_elements = CEILDIV (num_elements, kernel_threads);
 
     CUfunction cuda_function = device_param->cuda_function_decompress;
 

From fb82bfc169752c6dfc6cd20d6d97f649cf7c2992 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 8 May 2019 23:30:07 +0200
Subject: [PATCH 43/73] Improve thread handling based on FIXED_LOCAL_SIZE

---
 src/backend.c              | 40 ++++++++++++--------------------------
 src/modules/module_03200.c |  7 -------
 src/modules/module_09000.c | 37 +++++++++++++++++++++++++++--------
 src/modules/module_18600.c | 37 +++++++++++++++++++++++++++--------
 4 files changed, 70 insertions(+), 51 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index c5ea4bf1f..ed88b191f 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -3093,33 +3093,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
   if (device_param->is_cuda == true)
   {
-    u64 local_mem_size = 0;
-
-    switch (kern_run)
-    {
-      case KERN_RUN_1:      local_mem_size  = device_param->kernel_local_mem_size1;       break;
-      case KERN_RUN_12:     local_mem_size  = device_param->kernel_local_mem_size12;      break;
-      case KERN_RUN_2:      local_mem_size  = device_param->kernel_local_mem_size2;       break;
-      case KERN_RUN_23:     local_mem_size  = device_param->kernel_local_mem_size23;      break;
-      case KERN_RUN_3:      local_mem_size  = device_param->kernel_local_mem_size3;       break;
-      case KERN_RUN_4:      local_mem_size  = device_param->kernel_local_mem_size4;       break;
-      case KERN_RUN_INIT2:  local_mem_size  = device_param->kernel_local_mem_size_init2;  break;
-      case KERN_RUN_LOOP2:  local_mem_size  = device_param->kernel_local_mem_size_loop2;  break;
-      case KERN_RUN_AUX1:   local_mem_size  = device_param->kernel_local_mem_size_aux1;   break;
-      case KERN_RUN_AUX2:   local_mem_size  = device_param->kernel_local_mem_size_aux2;   break;
-      case KERN_RUN_AUX3:   local_mem_size  = device_param->kernel_local_mem_size_aux3;   break;
-      case KERN_RUN_AUX4:   local_mem_size  = device_param->kernel_local_mem_size_aux4;   break;
-    }
-
-    /*
-    if (local_mem_size)
-    {
-      const u32 max_threads_possible = (device_param->device_local_mem_size - 240) / local_mem_size;
-
-      kernel_threads = MIN (kernel_threads, max_threads_possible);
-    }
-    */
-
     CUfunction cuda_function = NULL;
 
     if (device_param->is_cuda == true)
@@ -7039,7 +7012,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
-    // there's not thread column in tuning db, stick to commandline if defined
+    // there's no thread column in tuning db, stick to commandline if defined
 
     if (user_options->kernel_threads_chgd == true)
     {
@@ -7291,6 +7264,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (jit_build_options != NULL)
       {
         build_options_module_len += snprintf (build_options_module_buf + build_options_module_len, build_options_sz - build_options_module_len, "%s", jit_build_options);
+
+        // this is a bit ugly
+        // would be better to have the module return the value as value
+
+        u32 fixed_local_size = 0;
+
+        if (sscanf (jit_build_options, "-D FIXED_LOCAL_SIZE=%u", &fixed_local_size) == 1)
+        {
+          device_param->kernel_threads_min = fixed_local_size;
+          device_param->kernel_threads_max = fixed_local_size;
+        }
       }
     }
 
diff --git a/src/modules/module_03200.c b/src/modules/module_03200.c
index 6cd15c7c7..b0b35b627 100644
--- a/src/modules/module_03200.c
+++ b/src/modules/module_03200.c
@@ -108,13 +108,6 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
       {
         overhead = 4;
       }
-
-      // no clue yet where this is coming from
-
-      if (device_param->is_cuda == true)
-      {
-        overhead = 240;
-      }
     }
 
     if (user_options->kernel_threads_chgd == true)
diff --git a/src/modules/module_09000.c b/src/modules/module_09000.c
index 8817fd4b6..e8cdac075 100644
--- a/src/modules/module_09000.c
+++ b/src/modules/module_09000.c
@@ -74,6 +74,11 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
+  // this uses some nice feedback effect.
+  // based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value
+  // which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result.
+  // therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1.
+
   u32 fixed_local_size = 0;
 
   if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
@@ -82,19 +87,35 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else
   {
-    if (user_options->kernel_threads_chgd == true)
-    {
-      fixed_local_size = user_options->kernel_threads;
-    }
-    else
-    {
-      u32 overhead = 0;
+    u32 overhead = 0;
 
-      if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      // note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
+      // Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
+      // on my development system. no clue where the 4 bytes are spent.
+      // I did some research on this and it seems to be related with the datatype.
+      // For example, if i used u8 instead, there's only 1 byte wasted.
+
+      if (device_param->is_opencl == true)
       {
         overhead = 4;
       }
+    }
 
+    if (user_options->kernel_threads_chgd == true)
+    {
+      fixed_local_size = user_options->kernel_threads;
+
+      // otherwise out-of-bound reads
+
+      if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead))
+      {
+        fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+      }
+    }
+    else
+    {
       fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
     }
   }
diff --git a/src/modules/module_18600.c b/src/modules/module_18600.c
index 109a3f65c..663717538 100644
--- a/src/modules/module_18600.c
+++ b/src/modules/module_18600.c
@@ -66,6 +66,11 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 {
   char *jit_build_options = NULL;
 
+  // this uses some nice feedback effect.
+  // based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value
+  // which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result.
+  // therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1.
+
   u32 fixed_local_size = 0;
 
   if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
@@ -74,19 +79,35 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   }
   else
   {
-    if (user_options->kernel_threads_chgd == true)
-    {
-      fixed_local_size = user_options->kernel_threads;
-    }
-    else
-    {
-      u32 overhead = 0;
+    u32 overhead = 0;
 
-      if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
+    {
+      // note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
+      // Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
+      // on my development system. no clue where the 4 bytes are spent.
+      // I did some research on this and it seems to be related with the datatype.
+      // For example, if i used u8 instead, there's only 1 byte wasted.
+
+      if (device_param->is_opencl == true)
       {
         overhead = 4;
       }
+    }
 
+    if (user_options->kernel_threads_chgd == true)
+    {
+      fixed_local_size = user_options->kernel_threads;
+
+      // otherwise out-of-bound reads
+
+      if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead))
+      {
+        fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
+      }
+    }
+    else
+    {
       fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
     }
   }

From 33028314f00a1021785333daf94f18b0a94f7213 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 00:04:05 +0200
Subject: [PATCH 44/73] Add hc_cuCtxSetCacheConfig()

---
 include/backend.h  |  2 ++
 include/ext_cuda.h |  1 +
 src/backend.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+)

diff --git a/include/backend.h b/include/backend.h
index 057edb87f..e2ea51fb7 100644
--- a/include/backend.h
+++ b/include/backend.h
@@ -42,6 +42,7 @@ int hc_nvrtcGetPTX               (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog,
 int hc_cuCtxCreate               (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev);
 int hc_cuCtxDestroy              (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
 int hc_cuCtxSetCurrent           (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
+int hc_cuCtxSetCacheConfig       (hashcat_ctx_t *hashcat_ctx, CUfunc_cache config);
 int hc_cuCtxSynchronize          (hashcat_ctx_t *hashcat_ctx);
 int hc_cuDeviceGetAttribute      (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev);
 int hc_cuDeviceGetCount          (hashcat_ctx_t *hashcat_ctx, int *count);
@@ -56,6 +57,7 @@ int hc_cuEventQuery              (hashcat_ctx_t *hashcat_ctx, CUevent hEvent);
 int hc_cuEventRecord             (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream);
 int hc_cuEventSynchronize        (hashcat_ctx_t *hashcat_ctx, CUevent hEvent);
 int hc_cuFuncGetAttribute        (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc);
+int hc_cuFuncSetAttribute        (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value);
 int hc_cuInit                    (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
 int hc_cuLaunchKernel            (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
 int hc_cuMemAlloc                (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize);
diff --git a/include/ext_cuda.h b/include/ext_cuda.h
index eb8967f09..49257acbb 100644
--- a/include/ext_cuda.h
+++ b/include/ext_cuda.h
@@ -1024,6 +1024,7 @@ typedef struct hc_cuda_lib
   CUDA_CUCTXGETSHAREDMEMCONFIG  cuCtxGetSharedMemConfig;
   CUDA_CUCTXPOPCURRENT          cuCtxPopCurrent;
   CUDA_CUCTXPUSHCURRENT         cuCtxPushCurrent;
+  CUDA_CUCTXSETCACHECONFIG      cuCtxSetCacheConfig;
   CUDA_CUCTXSETCURRENT          cuCtxSetCurrent;
   CUDA_CUCTXSETSHAREDMEMCONFIG  cuCtxSetSharedMemConfig;
   CUDA_CUCTXSYNCHRONIZE         cuCtxSynchronize;
diff --git a/src/backend.c b/src/backend.c
index ed88b191f..97c9d6b6e 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -886,6 +886,7 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
   HC_LOAD_FUNC (cuda, cuCtxGetSharedMemConfig,  CUDA_CUCTXGETSHAREDMEMCONFIG,   CUDA, 1);
   HC_LOAD_FUNC (cuda, cuCtxPopCurrent,          CUDA_CUCTXPOPCURRENT,           CUDA, 1);
   HC_LOAD_FUNC (cuda, cuCtxPushCurrent,         CUDA_CUCTXPUSHCURRENT,          CUDA, 1);
+  HC_LOAD_FUNC (cuda, cuCtxSetCacheConfig,      CUDA_CUCTXSETCACHECONFIG,       CUDA, 1);
   HC_LOAD_FUNC (cuda, cuCtxSetCurrent,          CUDA_CUCTXSETCURRENT,           CUDA, 1);
   HC_LOAD_FUNC (cuda, cuCtxSetSharedMemConfig,  CUDA_CUCTXSETSHAREDMEMCONFIG,   CUDA, 1);
   HC_LOAD_FUNC (cuda, cuCtxSynchronize,         CUDA_CUCTXSYNCHRONIZE,          CUDA, 1);
@@ -1467,6 +1468,33 @@ int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attri
   return 0;
 }
 
+int hc_cuFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuFuncSetAttribute (hfunc, attrib, value);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuFuncSetAttribute(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuFuncSetAttribute(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -1764,6 +1792,35 @@ int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent)
   return 0;
 }
 
+int hc_cuCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, CUfunc_cache config)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuCtxSetCacheConfig (config);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuCtxSetCacheConfig(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuCtxSetCacheConfig(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+
+
 // OpenCL
 
 int ocl_init (hashcat_ctx_t *hashcat_ctx)
@@ -5398,6 +5455,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if (rc_cuCtxSetCurrent == -1) return -1;
 
+      // bcrypt optimization?
+      //const int rc_cuCtxSetCacheConfig = hc_cuCtxSetCacheConfig (hashcat_ctx, CU_FUNC_CACHE_PREFER_SHARED);
+      //
+      //if (rc_cuCtxSetCacheConfig == -1) return -1;
+
       const bool has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");
 
       device_param->has_bfe = has_bfe;

From 6db4ab7e602d72fc7df54ca3b6a79b24883b68e0 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 11:11:52 +0200
Subject: [PATCH 45/73] Fix scrypt based algorithms to work on CUDA

---
 OpenCL/m08900-pure.cl | 138 ++++++++++++++++++++++++++++++------------
 OpenCL/m15700-pure.cl | 138 ++++++++++++++++++++++++++++++------------
 2 files changed, 198 insertions(+), 78 deletions(-)

diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl
index a2b476e97..d3d3339ce 100644
--- a/OpenCL/m08900-pure.cl
+++ b/OpenCL/m08900-pure.cl
@@ -24,6 +24,23 @@ typedef struct
 
 } scrypt_tmp_t;
 
+#ifdef IS_CUDA
+
+inline __device__ uint4 operator &  (const uint4  a, const u32   b) { return make_uint4 ((a.x &  b  ), (a.y &  b  ), (a.z &  b  ), (a.w &  b  ));  }
+inline __device__ uint4 operator << (const uint4  a, const u32   b) { return make_uint4 ((a.x << b  ), (a.y << b  ), (a.z << b  ), (a.w << b  ));  }
+inline __device__ uint4 operator >> (const uint4  a, const u32   b) { return make_uint4 ((a.x >> b  ), (a.y >> b  ), (a.z >> b  ), (a.w >> b  ));  }
+inline __device__ uint4 operator +  (const uint4  a, const uint4 b) { return make_uint4 ((a.x +  b.x), (a.y +  b.y), (a.z +  b.z), (a.w +  b.w));  }
+inline __device__ uint4 operator ^  (const uint4  a, const uint4 b) { return make_uint4 ((a.x ^  b.x), (a.y ^  b.y), (a.z ^  b.z), (a.w ^  b.w));  }
+inline __device__ uint4 operator |  (const uint4  a, const uint4 b) { return make_uint4 ((a.x |  b.x), (a.y |  b.y), (a.z |  b.z), (a.w |  b.w));  }
+inline __device__ uint4 operator ^= (      uint4 &a, const uint4 b) {                     a.x ^= b.x;   a.y ^= b.y;   a.z ^= b.z;   a.w ^= b.w;    }
+
+inline __device__ uint4 rotate (const uint4 a, const int n)
+{
+  return ((a >> n) | ((a >> (32 - n))));
+}
+
+#endif
+
 DECLSPEC uint4 hc_swap32_4 (uint4 v)
 {
   return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00),  8u));
@@ -40,26 +57,50 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
 
 #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
 
-#define SALSA20_2R()                \
-{                                   \
-  ADD_ROTATE_XOR (X1, X0, X3,  7);  \
-  ADD_ROTATE_XOR (X2, X1, X0,  9);  \
-  ADD_ROTATE_XOR (X3, X2, X1, 13);  \
-  ADD_ROTATE_XOR (X0, X3, X2, 18);  \
-                                    \
-  X1 = X1.s3012;                    \
-  X2 = X2.s2301;                    \
-  X3 = X3.s1230;                    \
-                                    \
-  ADD_ROTATE_XOR (X3, X0, X1,  7);  \
-  ADD_ROTATE_XOR (X2, X3, X0,  9);  \
-  ADD_ROTATE_XOR (X1, X2, X3, 13);  \
-  ADD_ROTATE_XOR (X0, X1, X2, 18);  \
-                                    \
-  X1 = X1.s1230;                    \
-  X2 = X2.s2301;                    \
-  X3 = X3.s3012;                    \
+#ifdef IS_CUDA
+
+#define SALSA20_2R()                        \
+{                                           \
+  ADD_ROTATE_XOR (X1, X0, X3,  7);          \
+  ADD_ROTATE_XOR (X2, X1, X0,  9);          \
+  ADD_ROTATE_XOR (X3, X2, X1, 13);          \
+  ADD_ROTATE_XOR (X0, X3, X2, 18);          \
+                                            \
+  X1 = make_uint4 (X1.w, X1.x, X1.y, X1.z); \
+  X2 = make_uint4 (X2.z, X2.w, X2.x, X2.y); \
+  X3 = make_uint4 (X3.y, X3.z, X3.w, X3.x); \
+                                            \
+  ADD_ROTATE_XOR (X3, X0, X1,  7);          \
+  ADD_ROTATE_XOR (X2, X3, X0,  9);          \
+  ADD_ROTATE_XOR (X1, X2, X3, 13);          \
+  ADD_ROTATE_XOR (X0, X1, X2, 18);          \
+                                            \
+  X1 = make_uint4 (X1.y, X1.z, X1.w, X1.x); \
+  X2 = make_uint4 (X2.z, X2.w, X2.x, X2.y); \
+  X3 = make_uint4 (X3.w, X3.x, X3.y, X3.z); \
 }
+#else
+#define SALSA20_2R()                        \
+{                                           \
+  ADD_ROTATE_XOR (X1, X0, X3,  7);          \
+  ADD_ROTATE_XOR (X2, X1, X0,  9);          \
+  ADD_ROTATE_XOR (X3, X2, X1, 13);          \
+  ADD_ROTATE_XOR (X0, X3, X2, 18);          \
+                                            \
+  X1 = X1.s3012;                            \
+  X2 = X2.s2301;                            \
+  X3 = X3.s1230;                            \
+                                            \
+  ADD_ROTATE_XOR (X3, X0, X1,  7);          \
+  ADD_ROTATE_XOR (X2, X3, X0,  9);          \
+  ADD_ROTATE_XOR (X1, X2, X3, 13);          \
+  ADD_ROTATE_XOR (X0, X1, X2, 18);          \
+                                            \
+  X1 = X1.s1230;                            \
+  X2 = X2.s2301;                            \
+  X3 = X3.s3012;                            \
+}
+#endif
 
 #define SALSA20_8_XOR() \
 {                       \
@@ -164,10 +205,17 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
   #endif
   for (u32 i = 0; i < STATE_CNT4; i += 4)
   {
+    #ifdef IS_CUDA
+    T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
+    T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
+    T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
+    T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
+    #else
     T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
     T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
     T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
     T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
+    #endif
 
     X[i + 0] = T[0];
     X[i + 1] = T[1];
@@ -204,10 +252,17 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
   #endif
   for (u32 i = 0; i < STATE_CNT4; i += 4)
   {
+    #ifdef IS_CUDA
+    T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
+    T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
+    T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
+    T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
+    #else
     T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
     T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
     T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
     T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
+    #endif
 
     X[i + 0] = T[0];
     X[i + 1] = T[1];
@@ -273,8 +328,13 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
     digest[6] = sha256_hmac_ctx2.opad.h[6];
     digest[7] = sha256_hmac_ctx2.opad.h[7];
 
+    #ifdef IS_CUDA
+    const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
+    const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
+    #else
     const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
     const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
+    #endif
 
     tmps[gid].P[k + 0] = tmp0;
     tmps[gid].P[k + 1] = tmp1;
@@ -287,10 +347,10 @@ KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
 
   if (gid >= gid_max) return;
 
-  GLOBAL_AS uint4 *d_scrypt0_buf = d_extra0_buf;
-  GLOBAL_AS uint4 *d_scrypt1_buf = d_extra1_buf;
-  GLOBAL_AS uint4 *d_scrypt2_buf = d_extra2_buf;
-  GLOBAL_AS uint4 *d_scrypt3_buf = d_extra3_buf;
+  GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
+  GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
+  GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
+  GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
 
   uint4 X[STATE_CNT4];
   uint4 T[STATE_CNT4];
@@ -349,31 +409,31 @@ KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
 
     tmp = tmps[gid].P[l + 0];
 
-    w0[0] = tmp.s0;
-    w0[1] = tmp.s1;
-    w0[2] = tmp.s2;
-    w0[3] = tmp.s3;
+    w0[0] = tmp.x;
+    w0[1] = tmp.y;
+    w0[2] = tmp.z;
+    w0[3] = tmp.w;
 
     tmp = tmps[gid].P[l + 1];
 
-    w1[0] = tmp.s0;
-    w1[1] = tmp.s1;
-    w1[2] = tmp.s2;
-    w1[3] = tmp.s3;
+    w1[0] = tmp.x;
+    w1[1] = tmp.y;
+    w1[2] = tmp.z;
+    w1[3] = tmp.w;
 
     tmp = tmps[gid].P[l + 2];
 
-    w2[0] = tmp.s0;
-    w2[1] = tmp.s1;
-    w2[2] = tmp.s2;
-    w2[3] = tmp.s3;
+    w2[0] = tmp.x;
+    w2[1] = tmp.y;
+    w2[2] = tmp.z;
+    w2[3] = tmp.w;
 
     tmp = tmps[gid].P[l + 3];
 
-    w3[0] = tmp.s0;
-    w3[1] = tmp.s1;
-    w3[2] = tmp.s2;
-    w3[3] = tmp.s3;
+    w3[0] = tmp.x;
+    w3[1] = tmp.y;
+    w3[2] = tmp.z;
+    w3[3] = tmp.w;
 
     sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
   }
diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl
index 3e12e7100..2bd7b709c 100644
--- a/OpenCL/m15700-pure.cl
+++ b/OpenCL/m15700-pure.cl
@@ -24,6 +24,23 @@ typedef struct
 
 } scrypt_tmp_t;
 
+#ifdef IS_CUDA
+
+inline __device__ uint4 operator &  (const uint4  a, const u32   b) { return make_uint4 ((a.x &  b  ), (a.y &  b  ), (a.z &  b  ), (a.w &  b  ));  }
+inline __device__ uint4 operator << (const uint4  a, const u32   b) { return make_uint4 ((a.x << b  ), (a.y << b  ), (a.z << b  ), (a.w << b  ));  }
+inline __device__ uint4 operator >> (const uint4  a, const u32   b) { return make_uint4 ((a.x >> b  ), (a.y >> b  ), (a.z >> b  ), (a.w >> b  ));  }
+inline __device__ uint4 operator +  (const uint4  a, const uint4 b) { return make_uint4 ((a.x +  b.x), (a.y +  b.y), (a.z +  b.z), (a.w +  b.w));  }
+inline __device__ uint4 operator ^  (const uint4  a, const uint4 b) { return make_uint4 ((a.x ^  b.x), (a.y ^  b.y), (a.z ^  b.z), (a.w ^  b.w));  }
+inline __device__ uint4 operator |  (const uint4  a, const uint4 b) { return make_uint4 ((a.x |  b.x), (a.y |  b.y), (a.z |  b.z), (a.w |  b.w));  }
+inline __device__ uint4 operator ^= (      uint4 &a, const uint4 b) {                     a.x ^= b.x;   a.y ^= b.y;   a.z ^= b.z;   a.w ^= b.w;    }
+
+inline __device__ uint4 rotate (const uint4 a, const int n)
+{
+  return ((a >> n) | ((a >> (32 - n))));
+}
+
+#endif
+
 typedef struct ethereum_scrypt
 {
   u32 salt_buf[16];
@@ -47,26 +64,50 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
 
 #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
 
-#define SALSA20_2R()                \
-{                                   \
-  ADD_ROTATE_XOR (X1, X0, X3,  7);  \
-  ADD_ROTATE_XOR (X2, X1, X0,  9);  \
-  ADD_ROTATE_XOR (X3, X2, X1, 13);  \
-  ADD_ROTATE_XOR (X0, X3, X2, 18);  \
-                                    \
-  X1 = X1.s3012;                    \
-  X2 = X2.s2301;                    \
-  X3 = X3.s1230;                    \
-                                    \
-  ADD_ROTATE_XOR (X3, X0, X1,  7);  \
-  ADD_ROTATE_XOR (X2, X3, X0,  9);  \
-  ADD_ROTATE_XOR (X1, X2, X3, 13);  \
-  ADD_ROTATE_XOR (X0, X1, X2, 18);  \
-                                    \
-  X1 = X1.s1230;                    \
-  X2 = X2.s2301;                    \
-  X3 = X3.s3012;                    \
+#ifdef IS_CUDA
+
+#define SALSA20_2R()                        \
+{                                           \
+  ADD_ROTATE_XOR (X1, X0, X3,  7);          \
+  ADD_ROTATE_XOR (X2, X1, X0,  9);          \
+  ADD_ROTATE_XOR (X3, X2, X1, 13);          \
+  ADD_ROTATE_XOR (X0, X3, X2, 18);          \
+                                            \
+  X1 = make_uint4 (X1.w, X1.x, X1.y, X1.z); \
+  X2 = make_uint4 (X2.z, X2.w, X2.x, X2.y); \
+  X3 = make_uint4 (X3.y, X3.z, X3.w, X3.x); \
+                                            \
+  ADD_ROTATE_XOR (X3, X0, X1,  7);          \
+  ADD_ROTATE_XOR (X2, X3, X0,  9);          \
+  ADD_ROTATE_XOR (X1, X2, X3, 13);          \
+  ADD_ROTATE_XOR (X0, X1, X2, 18);          \
+                                            \
+  X1 = make_uint4 (X1.y, X1.z, X1.w, X1.x); \
+  X2 = make_uint4 (X2.z, X2.w, X2.x, X2.y); \
+  X3 = make_uint4 (X3.w, X3.x, X3.y, X3.z); \
 }
+#else
+#define SALSA20_2R()                        \
+{                                           \
+  ADD_ROTATE_XOR (X1, X0, X3,  7);          \
+  ADD_ROTATE_XOR (X2, X1, X0,  9);          \
+  ADD_ROTATE_XOR (X3, X2, X1, 13);          \
+  ADD_ROTATE_XOR (X0, X3, X2, 18);          \
+                                            \
+  X1 = X1.s3012;                            \
+  X2 = X2.s2301;                            \
+  X3 = X3.s1230;                            \
+                                            \
+  ADD_ROTATE_XOR (X3, X0, X1,  7);          \
+  ADD_ROTATE_XOR (X2, X3, X0,  9);          \
+  ADD_ROTATE_XOR (X1, X2, X3, 13);          \
+  ADD_ROTATE_XOR (X0, X1, X2, 18);          \
+                                            \
+  X1 = X1.s1230;                            \
+  X2 = X2.s2301;                            \
+  X3 = X3.s3012;                            \
+}
+#endif
 
 #define SALSA20_8_XOR() \
 {                       \
@@ -171,10 +212,17 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
   #endif
   for (u32 i = 0; i < STATE_CNT4; i += 4)
   {
+    #ifdef IS_CUDA
+    T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
+    T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
+    T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
+    T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
+    #else
     T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
     T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
     T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
     T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
+    #endif
 
     X[i + 0] = T[0];
     X[i + 1] = T[1];
@@ -211,10 +259,17 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
   #endif
   for (u32 i = 0; i < STATE_CNT4; i += 4)
   {
+    #ifdef IS_CUDA
+    T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
+    T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
+    T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
+    T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
+    #else
     T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
     T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
     T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
     T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
+    #endif
 
     X[i + 0] = T[0];
     X[i + 1] = T[1];
@@ -411,8 +466,13 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
     digest[6] = sha256_hmac_ctx2.opad.h[6];
     digest[7] = sha256_hmac_ctx2.opad.h[7];
 
+    #ifdef IS_CUDA
+    const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
+    const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
+    #else
     const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
     const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
+    #endif
 
     tmps[gid].P[k + 0] = tmp0;
     tmps[gid].P[k + 1] = tmp1;
@@ -425,10 +485,10 @@ KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
 
   if (gid >= gid_max) return;
 
-  GLOBAL_AS uint4 *d_scrypt0_buf = d_extra0_buf;
-  GLOBAL_AS uint4 *d_scrypt1_buf = d_extra1_buf;
-  GLOBAL_AS uint4 *d_scrypt2_buf = d_extra2_buf;
-  GLOBAL_AS uint4 *d_scrypt3_buf = d_extra3_buf;
+  GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
+  GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
+  GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
+  GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
 
   uint4 X[STATE_CNT4];
   uint4 T[STATE_CNT4];
@@ -487,31 +547,31 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
 
     tmp = tmps[gid].P[l + 0];
 
-    w0[0] = tmp.s0;
-    w0[1] = tmp.s1;
-    w0[2] = tmp.s2;
-    w0[3] = tmp.s3;
+    w0[0] = tmp.x;
+    w0[1] = tmp.y;
+    w0[2] = tmp.z;
+    w0[3] = tmp.w;
 
     tmp = tmps[gid].P[l + 1];
 
-    w1[0] = tmp.s0;
-    w1[1] = tmp.s1;
-    w1[2] = tmp.s2;
-    w1[3] = tmp.s3;
+    w1[0] = tmp.x;
+    w1[1] = tmp.y;
+    w1[2] = tmp.z;
+    w1[3] = tmp.w;
 
     tmp = tmps[gid].P[l + 2];
 
-    w2[0] = tmp.s0;
-    w2[1] = tmp.s1;
-    w2[2] = tmp.s2;
-    w2[3] = tmp.s3;
+    w2[0] = tmp.x;
+    w2[1] = tmp.y;
+    w2[2] = tmp.z;
+    w2[3] = tmp.w;
 
     tmp = tmps[gid].P[l + 3];
 
-    w3[0] = tmp.s0;
-    w3[1] = tmp.s1;
-    w3[2] = tmp.s2;
-    w3[3] = tmp.s3;
+    w3[0] = tmp.x;
+    w3[1] = tmp.y;
+    w3[2] = tmp.z;
+    w3[3] = tmp.w;
 
     sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
   }

From ec4d4218c03e03c20d65152f75642fdf98695723 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 12:59:36 +0200
Subject: [PATCH 46/73] Add some missing operators for vector types

---
 OpenCL/inc_types.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 44c913699..a1bb0247b 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -128,6 +128,15 @@ inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1
 inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;     }
 inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1;  }
 
+inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;     }
+inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1;  }
+
+inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;     }
+inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1;  }
+
+inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;     }
+inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1;  }
+
 inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   );  }
 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1));  }
 
@@ -172,6 +181,15 @@ inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1
 inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;     }
 inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1;  }
 
+inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;     }
+inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1;  }
+
+inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;     }
+inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1;  }
+
+inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;     }
+inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1;  }
+
 inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   );  }
 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1));  }
 
@@ -276,6 +294,15 @@ inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1
 inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;     }
 inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3;  }
 
+inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;     }
+inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3;  }
+
+inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;     }
+inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3;  }
+
+inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;     }
+inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3;  }
+
 inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   );  }
 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3));  }
 
@@ -320,6 +347,15 @@ inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1
 inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;     }
 inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3;  }
 
+inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;     }
+inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3;  }
+
+inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;     }
+inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3;  }
+
+inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;     }
+inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3;  }
+
 inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   );  }
 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3));  }
 
@@ -440,6 +476,15 @@ inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1
 inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;     }
 inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7;  }
 
+inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;     }
+inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7;  }
+
+inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;     }
+inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7;  }
+
+inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;     }
+inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7;  }
+
 inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b)   );  }
 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7));  }
 
@@ -484,6 +529,15 @@ inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1
 inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;     }
 inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7;  }
 
+inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;     }
+inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7;  }
+
+inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;     }
+inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7;  }
+
+inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;     }
+inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7;  }
+
 inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b)   );  }
 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7));  }
 
@@ -636,6 +690,15 @@ inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1
 inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;    a.s8 -= b;    a.s9 -= b;    a.sa -= b;    a.sb -= b;    a.sc -= b;    a.sd -= b;    a.se -= b;    a.sf -= b;    }
 inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
 
+inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;    a.s8 *= b;    a.s9 *= b;    a.sa *= b;    a.sb *= b;    a.sc *= b;    a.sd *= b;    a.se *= b;    a.sf *= b;    }
+inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
+
+inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;    a.s8 >>= b;    a.s9 >>= b;    a.sa >>= b;    a.sb >>= b;    a.sc >>= b;    a.sd >>= b;    a.se >>= b;    a.sf >>= b;    }
+inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
+
+inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;    a.s8 <<= b;    a.s9 <<= b;    a.sa <<= b;    a.sb <<= b;    a.sc <<= b;    a.sd <<= b;    a.se <<= b;    a.sf <<= b;    }
+inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
+
 inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b),    (a.s8 << b),    (a.s9 << b)   , (a.sa << b),    (a.sb << b)   , (a.sc << b),    (a.sd << b)   , (a.se << b),    (a.sf << b)   );  }
 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf));  }
 
@@ -680,6 +743,15 @@ inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1
 inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;    a.s8 -= b;    a.s9 -= b;    a.sa -= b;    a.sb -= b;    a.sc -= b;    a.sd -= b;    a.se -= b;    a.sf -= b;    }
 inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
 
+inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;    a.s8 *= b;    a.s9 *= b;    a.sa *= b;    a.sb *= b;    a.sc *= b;    a.sd *= b;    a.se *= b;    a.sf *= b;    }
+inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
+
+inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;    a.s8 >>= b;    a.s9 >>= b;    a.sa >>= b;    a.sb >>= b;    a.sc >>= b;    a.sd >>= b;    a.se >>= b;    a.sf >>= b;    }
+inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
+
+inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;    a.s8 <<= b;    a.s9 <<= b;    a.sa <<= b;    a.sb <<= b;    a.sc <<= b;    a.sd <<= b;    a.se <<= b;    a.sf <<= b;    }
+inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
+
 inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b),    (a.s8 << b),    (a.s9 << b)   , (a.sa << b),    (a.sb << b)   , (a.sc << b),    (a.sd << b)   , (a.se << b),    (a.sf << b)   );  }
 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf));  }
 

From 82927c13c8cd61f43d6e3e5756d06c4eeb9abcab Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 13:09:27 +0200
Subject: [PATCH 47/73] Get rid of uchar4 in -m 9100

---
 OpenCL/m09100-pure.cl | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/OpenCL/m09100-pure.cl b/OpenCL/m09100-pure.cl
index 38f6fc97a..8ccc65020 100644
--- a/OpenCL/m09100-pure.cl
+++ b/OpenCL/m09100-pure.cl
@@ -296,13 +296,6 @@ DECLSPEC void base64_encode (u8 *base64_hash, const u32 len, const u8 *base64_pl
 
 DECLSPEC void lotus6_base64_encode (u8 *base64_hash, const u32 salt0, const u32 salt1, const u32 a, const u32 b, const u32 c)
 {
-  const uchar4 salt0c = as_uchar4 (salt0);
-  const uchar4 salt1c = as_uchar4 (salt1);
-
-  const uchar4 ac = as_uchar4 (a);
-  const uchar4 bc = as_uchar4 (b);
-  const uchar4 cc = as_uchar4 (c);
-
   u8 tmp[24]; // size 22 (=pw_len) is needed but base64 needs size divisible by 4
 
   /*
@@ -311,23 +304,23 @@ DECLSPEC void lotus6_base64_encode (u8 *base64_hash, const u32 salt0, const u32
 
   u8 base64_plain[16];
 
-  base64_plain[ 0] = salt0c.s0;
-  base64_plain[ 1] = salt0c.s1;
-  base64_plain[ 2] = salt0c.s2;
-  base64_plain[ 3] = salt0c.s3;
+  base64_plain[ 0] = unpack_v8a_from_v32_S (salt0);
+  base64_plain[ 1] = unpack_v8b_from_v32_S (salt0);
+  base64_plain[ 2] = unpack_v8c_from_v32_S (salt0);
+  base64_plain[ 3] = unpack_v8d_from_v32_S (salt0);
   base64_plain[ 3] -= -4; // dont ask!
-  base64_plain[ 4] = salt1c.s0;
-  base64_plain[ 5] = ac.s0;
-  base64_plain[ 6] = ac.s1;
-  base64_plain[ 7] = ac.s2;
-  base64_plain[ 8] = ac.s3;
-  base64_plain[ 9] = bc.s0;
-  base64_plain[10] = bc.s1;
-  base64_plain[11] = bc.s2;
-  base64_plain[12] = bc.s3;
-  base64_plain[13] = cc.s0;
-  base64_plain[14] = cc.s1;
-  base64_plain[15] = cc.s2;
+  base64_plain[ 4] = unpack_v8a_from_v32_S (salt1);
+  base64_plain[ 5] = unpack_v8a_from_v32_S (a);
+  base64_plain[ 6] = unpack_v8b_from_v32_S (a);
+  base64_plain[ 7] = unpack_v8c_from_v32_S (a);
+  base64_plain[ 8] = unpack_v8d_from_v32_S (a);
+  base64_plain[ 9] = unpack_v8a_from_v32_S (b);
+  base64_plain[10] = unpack_v8b_from_v32_S (b);
+  base64_plain[11] = unpack_v8c_from_v32_S (b);
+  base64_plain[12] = unpack_v8d_from_v32_S (b);
+  base64_plain[13] = unpack_v8a_from_v32_S (c);
+  base64_plain[14] = unpack_v8b_from_v32_S (c);
+  base64_plain[15] = unpack_v8c_from_v32_S (c);
 
   /*
    * base64 encode the $salt.$digest string

From 39e150fc1edf349846ddb236a13623126525a5f4 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 14:37:14 +0200
Subject: [PATCH 48/73] Use xxx_v2 CUDA symbols

---
 src/backend.c | 125 +++++++++++++++++++++++++++++---------------------
 1 file changed, 72 insertions(+), 53 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 97c9d6b6e..7639e63c2 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -879,59 +879,78 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
 
   if (cuda->lib == NULL) return -1;
 
-  HC_LOAD_FUNC (cuda, cuCtxCreate,              CUDA_CUCTXCREATE,               CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxDestroy,             CUDA_CUCTXDESTROY,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxGetCacheConfig,      CUDA_CUCTXGETCACHECONFIG,       CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxGetCurrent,          CUDA_CUCTXGETCURRENT,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxGetSharedMemConfig,  CUDA_CUCTXGETSHAREDMEMCONFIG,   CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxPopCurrent,          CUDA_CUCTXPOPCURRENT,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxPushCurrent,         CUDA_CUCTXPUSHCURRENT,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxSetCacheConfig,      CUDA_CUCTXSETCACHECONFIG,       CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxSetCurrent,          CUDA_CUCTXSETCURRENT,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxSetSharedMemConfig,  CUDA_CUCTXSETSHAREDMEMCONFIG,   CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuCtxSynchronize,         CUDA_CUCTXSYNCHRONIZE,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceGetAttribute,     CUDA_CUDEVICEGETATTRIBUTE,      CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceGetCount,         CUDA_CUDEVICEGETCOUNT,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceGet,              CUDA_CUDEVICEGET,               CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceGetName,          CUDA_CUDEVICEGETNAME,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDeviceTotalMem,         CUDA_CUDEVICETOTALMEM,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuDriverGetVersion,       CUDA_CUDRIVERGETVERSION,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventCreate,            CUDA_CUEVENTCREATE,             CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventDestroy,           CUDA_CUEVENTDESTROY,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventElapsedTime,       CUDA_CUEVENTELAPSEDTIME,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventQuery,             CUDA_CUEVENTQUERY,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventRecord,            CUDA_CUEVENTRECORD,             CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuEventSynchronize,       CUDA_CUEVENTSYNCHRONIZE,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuFuncGetAttribute,       CUDA_CUFUNCGETATTRIBUTE,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuFuncSetAttribute,       CUDA_CUFUNCSETATTRIBUTE,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuFuncSetCacheConfig,     CUDA_CUFUNCSETCACHECONFIG,      CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuFuncSetSharedMemConfig, CUDA_CUFUNCSETSHAREDMEMCONFIG,  CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuGetErrorName,           CUDA_CUGETERRORNAME,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuGetErrorString,         CUDA_CUGETERRORSTRING,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuInit,                   CUDA_CUINIT,                    CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuLaunchKernel,           CUDA_CULAUNCHKERNEL,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemAlloc,               CUDA_CUMEMALLOC,                CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemAllocHost,           CUDA_CUMEMALLOCHOST,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemcpyDtoD,             CUDA_CUMEMCPYDTOD,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemcpyDtoH,             CUDA_CUMEMCPYDTOH,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemcpyHtoD,             CUDA_CUMEMCPYHTOD,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemFree,                CUDA_CUMEMFREE,                 CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemFreeHost,            CUDA_CUMEMFREEHOST,             CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemGetInfo,             CUDA_CUMEMGETINFO,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemsetD32,              CUDA_CUMEMSETD32,               CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuMemsetD8,               CUDA_CUMEMSETD8,                CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleGetFunction,      CUDA_CUMODULEGETFUNCTION,       CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleGetGlobal,        CUDA_CUMODULEGETGLOBAL,         CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleLoad,             CUDA_CUMODULELOAD,              CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleLoadData,         CUDA_CUMODULELOADDATA,          CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleLoadDataEx,       CUDA_CUMODULELOADDATAEX,        CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuModuleUnload,           CUDA_CUMODULEUNLOAD,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuProfilerStart,          CUDA_CUPROFILERSTART,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuProfilerStop,           CUDA_CUPROFILERSTOP,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuStreamCreate,           CUDA_CUSTREAMCREATE,            CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuStreamDestroy,          CUDA_CUSTREAMDESTROY,           CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuStreamSynchronize,      CUDA_CUSTREAMSYNCHRONIZE,       CUDA, 1);
-  HC_LOAD_FUNC (cuda, cuStreamWaitEvent,        CUDA_CUSTREAMWAITEVENT,         CUDA, 1);
+  #define HC_LOAD_FUNC_CUDA(ptr,name,cudaname,type,libname,noerr) \
+    ptr->name = (type) hc_dlsym (ptr->lib, #cudaname); \
+    if (noerr != -1) { \
+      if (!ptr->name) { \
+        if (noerr == 1) { \
+          event_log_error (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \
+          return -1; \
+        } \
+        if (noerr != 1) { \
+          event_log_warning (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \
+          return 0; \
+        } \
+      } \
+    }
+
+  // finding the right symbol is a PITA, because of the _v2 suffix
+  // a good reference is cuda.h itself
+  // this needs to be verified for each new cuda release
+
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxCreate,              cuCtxCreate_v2,            CUDA_CUCTXCREATE,               CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxDestroy,             cuCtxDestroy_v2,           CUDA_CUCTXDESTROY,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxGetCacheConfig,      cuCtxGetCacheConfig,       CUDA_CUCTXGETCACHECONFIG,       CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxGetCurrent,          cuCtxGetCurrent,           CUDA_CUCTXGETCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxGetSharedMemConfig,  cuCtxGetSharedMemConfig,   CUDA_CUCTXGETSHAREDMEMCONFIG,   CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxPopCurrent,          cuCtxPopCurrent_v2,        CUDA_CUCTXPOPCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxPushCurrent,         cuCtxPushCurrent_v2,       CUDA_CUCTXPUSHCURRENT,          CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxSetCacheConfig,      cuCtxSetCacheConfig,       CUDA_CUCTXSETCACHECONFIG,       CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxSetCurrent,          cuCtxSetCurrent,           CUDA_CUCTXSETCURRENT,           CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxSetSharedMemConfig,  cuCtxSetSharedMemConfig,   CUDA_CUCTXSETSHAREDMEMCONFIG,   CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuCtxSynchronize,         cuCtxSynchronize,          CUDA_CUCTXSYNCHRONIZE,          CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetAttribute,     cuDeviceGetAttribute,      CUDA_CUDEVICEGETATTRIBUTE,      CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetCount,         cuDeviceGetCount,          CUDA_CUDEVICEGETCOUNT,          CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuDeviceGet,              cuDeviceGet,               CUDA_CUDEVICEGET,               CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetName,          cuDeviceGetName,           CUDA_CUDEVICEGETNAME,           CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuDeviceTotalMem,         cuDeviceTotalMem_v2,       CUDA_CUDEVICETOTALMEM,          CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuDriverGetVersion,       cuDriverGetVersion,        CUDA_CUDRIVERGETVERSION,        CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuEventCreate,            cuEventCreate,             CUDA_CUEVENTCREATE,             CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuEventDestroy,           cuEventDestroy_v2,         CUDA_CUEVENTDESTROY,            CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuEventElapsedTime,       cuEventElapsedTime,        CUDA_CUEVENTELAPSEDTIME,        CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuEventQuery,             cuEventQuery,              CUDA_CUEVENTQUERY,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuEventRecord,            cuEventRecord,             CUDA_CUEVENTRECORD,             CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuEventSynchronize,       cuEventSynchronize,        CUDA_CUEVENTSYNCHRONIZE,        CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuFuncGetAttribute,       cuFuncGetAttribute,        CUDA_CUFUNCGETATTRIBUTE,        CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuFuncSetAttribute,       cuFuncSetAttribute,        CUDA_CUFUNCSETATTRIBUTE,        CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuFuncSetCacheConfig,     cuFuncSetCacheConfig,      CUDA_CUFUNCSETCACHECONFIG,      CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuFuncSetSharedMemConfig, cuFuncSetSharedMemConfig,  CUDA_CUFUNCSETSHAREDMEMCONFIG,  CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuGetErrorName,           cuGetErrorName,            CUDA_CUGETERRORNAME,            CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuGetErrorString,         cuGetErrorString,          CUDA_CUGETERRORSTRING,          CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuInit,                   cuInit,                    CUDA_CUINIT,                    CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuLaunchKernel,           cuLaunchKernel,            CUDA_CULAUNCHKERNEL,            CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemAlloc,               cuMemAlloc_v2,             CUDA_CUMEMALLOC,                CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemAllocHost,           cuMemAllocHost_v2,         CUDA_CUMEMALLOCHOST,            CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoD,             cuMemcpyDtoD_v2,           CUDA_CUMEMCPYDTOD,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoH,             cuMemcpyDtoH_v2,           CUDA_CUMEMCPYDTOH,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemcpyHtoD,             cuMemcpyHtoD_v2,           CUDA_CUMEMCPYHTOD,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemFree,                cuMemFree_v2,              CUDA_CUMEMFREE,                 CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemFreeHost,            cuMemFreeHost,             CUDA_CUMEMFREEHOST,             CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemGetInfo,             cuMemGetInfo_v2,           CUDA_CUMEMGETINFO,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32,              cuMemsetD32_v2,            CUDA_CUMEMSETD32,               CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8,               cuMemsetD8_v2,             CUDA_CUMEMSETD8,                CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuModuleGetFunction,      cuModuleGetFunction,       CUDA_CUMODULEGETFUNCTION,       CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuModuleGetGlobal,        cuModuleGetGlobal_v2,      CUDA_CUMODULEGETGLOBAL,         CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuModuleLoad,             cuModuleLoad,              CUDA_CUMODULELOAD,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuModuleLoadData,         cuModuleLoadData,          CUDA_CUMODULELOADDATA,          CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuModuleLoadDataEx,       cuModuleLoadDataEx,        CUDA_CUMODULELOADDATAEX,        CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuModuleUnload,           cuModuleUnload,            CUDA_CUMODULEUNLOAD,            CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuProfilerStart,          cuProfilerStart,           CUDA_CUPROFILERSTART,           CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuProfilerStop,           cuProfilerStop,            CUDA_CUPROFILERSTOP,            CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuStreamCreate,           cuStreamCreate,            CUDA_CUSTREAMCREATE,            CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuStreamDestroy,          cuStreamDestroy_v2,        CUDA_CUSTREAMDESTROY,           CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuStreamSynchronize,      cuStreamSynchronize,       CUDA_CUSTREAMSYNCHRONIZE,       CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuStreamWaitEvent,        cuStreamWaitEvent,         CUDA_CUSTREAMWAITEVENT,         CUDA, 1);
 
   return 0;
 }

From be8f29ca39c68dfe81777a45b7a07778eee8853a Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 16:30:08 +0200
Subject: [PATCH 49/73] Only warn about broken NVIDIA driver

---
 src/backend.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/backend.c b/src/backend.c
index 7639e63c2..75fe65270 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -6227,7 +6227,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (nv_warn == true)
                 {
-                  event_log_error (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->opencl_driver_version);
+                  event_log_warning (hashcat_ctx, "* Device #%u: Outdated or broken NVIDIA driver '%s' detected!", device_id + 1, device_param->opencl_driver_version);
+                  event_log_warning (hashcat_ctx, NULL);
 
                   event_log_warning (hashcat_ctx, "You are STRONGLY encouraged to use the officially supported NVIDIA driver.");
                   event_log_warning (hashcat_ctx, "See hashcat's homepage for officially supported NVIDIA drivers.");

From ce20a5ab6b88d7f33390d80bbcc96ff433adac93 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 16:55:48 +0200
Subject: [PATCH 50/73] Fix uint4 rotate in scrypt based kernels for CUDA

---
 OpenCL/m08900-pure.cl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl
index d3d3339ce..c5242d18e 100644
--- a/OpenCL/m08900-pure.cl
+++ b/OpenCL/m08900-pure.cl
@@ -36,7 +36,7 @@ inline __device__ uint4 operator ^= (      uint4 &a, const uint4 b) {
 
 inline __device__ uint4 rotate (const uint4 a, const int n)
 {
-  return ((a >> n) | ((a >> (32 - n))));
+  return ((a << n) | ((a >> (32 - n))));
 }
 
 #endif

From a2b5981303ee4ad91a2660e9b0a347c718762507 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 21:20:50 +0200
Subject: [PATCH 51/73] Fix some library names

---
 src/backend.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 75fe65270..af9c47c2d 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -685,7 +685,7 @@ int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
   #if   defined (_WIN)
   nvrtc->lib = hc_dlopen ("nvrtc.dll");
   #elif defined (__APPLE__)
-  nvrtc->lib = hc_dlopen ("/System/Library/Frameworks/NVRTC.framework/NVRTC");
+  nvrtc->lib = hc_dlopen ("nvrtc.dylib");
   #elif defined (__CYGWIN__)
   nvrtc->lib = hc_dlopen ("nvrtc.dll");
   #else
@@ -866,11 +866,11 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
   memset (cuda, 0, sizeof (CUDA_PTR));
 
   #if   defined (_WIN)
-  cuda->lib = hc_dlopen ("cuda.dll");
+  cuda->lib = hc_dlopen ("nvcuda.dll");
   #elif defined (__APPLE__)
-  cuda->lib = hc_dlopen ("/System/Library/Frameworks/CUDA.framework/CUDA");
+  cuda->lib = hc_dlopen ("nvcuda.dylib");
   #elif defined (__CYGWIN__)
-  cuda->lib = hc_dlopen ("cuda.dll");
+  cuda->lib = hc_dlopen ("nvcuda.dll");
   #else
   cuda->lib = hc_dlopen ("libcuda.so");
 
@@ -5487,7 +5487,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->has_lop3 = has_lop3;
 
-      const bool has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
+      const bool has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");
 
       device_param->has_mov64 = has_mov64;
 

From 54feb62e94a43d1bdda5ce7d29a82ff2f46072f9 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 9 May 2019 22:17:13 +0200
Subject: [PATCH 52/73] brute-force nvrtc .dll name

---
 src/backend.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/backend.c b/src/backend.c
index af9c47c2d..d07e72acf 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -684,6 +684,36 @@ int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
 
   #if   defined (_WIN)
   nvrtc->lib = hc_dlopen ("nvrtc.dll");
+
+  if (nvrtc->lib == NULL)
+  {
+    // super annoying: nvidia is using the CUDA version in nvrtc???.dll filename!
+    // however, the cuda version string comes from nvcuda.dll which is from nvidia driver, but
+    // the driver version and the installed CUDA toolkit version can be different, so it cannot be used as a reference.
+    // brute force to the rescue
+
+    char dllname[100];
+
+    for (int major = 20; major >= 0; major--)
+    {
+      for (int minor = 20; minor >= 0; minor--)
+      {
+        snprintf (dllname, sizeof (dllname), "nvrtc64_%d%d.dll", major, minor);
+
+        nvrtc->lib = hc_dlopen (dllname);
+
+        if (nvrtc->lib) break;
+
+        snprintf (dllname, sizeof (dllname), "nvrtc64_%d%d_0.dll", major, minor);
+
+        nvrtc->lib = hc_dlopen (dllname);
+
+        if (nvrtc->lib) break;
+      }
+
+      if (nvrtc->lib) break;
+    }
+  }
   #elif defined (__APPLE__)
   nvrtc->lib = hc_dlopen ("nvrtc.dylib");
   #elif defined (__CYGWIN__)

From 5d14a5930469e34896ab073dfd3ef688e42ad8e2 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 10 May 2019 10:11:12 +0200
Subject: [PATCH 53/73] Need 3.x nvrtc minimum

---
 src/backend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend.c b/src/backend.c
index d07e72acf..9857dff0c 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -694,7 +694,7 @@ int nvrtc_init (hashcat_ctx_t *hashcat_ctx)
 
     char dllname[100];
 
-    for (int major = 20; major >= 0; major--)
+    for (int major = 20; major >= 3; major--) // older than 3.x do not ship _v2 functions anyway
     {
       for (int minor = 20; minor >= 0; minor--)
       {

From 46f737c5afd371777f434411258101ff61b63723 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 10 May 2019 13:22:26 +0200
Subject: [PATCH 54/73] Use real constant memory on CUDA

---
 OpenCL/inc_common.h    | 55 +++++++++++++++++++++++++++++++++--
 OpenCL/inc_platform.cl | 20 +++++++++++++
 src/backend.c          | 65 ++++++++++++++++++++++++++++++++++++------
 3 files changed, 130 insertions(+), 10 deletions(-)

diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h
index bdcb16d38..e323d0e0a 100644
--- a/OpenCL/inc_common.h
+++ b/OpenCL/inc_common.h
@@ -26,6 +26,44 @@
  *   - P19: Type of the esalt_bufs structure with additional data, or void.
  */
 
+#ifdef IS_CUDA
+#define KERN_ATTR(p2,p4,p5,p6,p19)                              \
+  MAYBE_UNUSED GLOBAL_AS       pw_t          *pws,              \
+  MAYBE_UNUSED p2        const kernel_rule_t *g_rules_buf,      \
+  MAYBE_UNUSED GLOBAL_AS const pw_t          *combs_buf,        \
+  MAYBE_UNUSED p4,                                              \
+  MAYBE_UNUSED GLOBAL_AS p5                  *tmps,             \
+  MAYBE_UNUSED GLOBAL_AS p6                  *hooks,            \
+  MAYBE_UNUSED GLOBAL_AS const u32           *bitmaps_buf_s1_a, \
+  MAYBE_UNUSED GLOBAL_AS const u32           *bitmaps_buf_s1_b, \
+  MAYBE_UNUSED GLOBAL_AS const u32           *bitmaps_buf_s1_c, \
+  MAYBE_UNUSED GLOBAL_AS const u32           *bitmaps_buf_s1_d, \
+  MAYBE_UNUSED GLOBAL_AS const u32           *bitmaps_buf_s2_a, \
+  MAYBE_UNUSED GLOBAL_AS const u32           *bitmaps_buf_s2_b, \
+  MAYBE_UNUSED GLOBAL_AS const u32           *bitmaps_buf_s2_c, \
+  MAYBE_UNUSED GLOBAL_AS const u32           *bitmaps_buf_s2_d, \
+  MAYBE_UNUSED GLOBAL_AS       plain_t       *plains_buf,       \
+  MAYBE_UNUSED GLOBAL_AS const digest_t      *digests_buf,      \
+  MAYBE_UNUSED GLOBAL_AS       u32           *hashes_shown,     \
+  MAYBE_UNUSED GLOBAL_AS const salt_t        *salt_bufs,        \
+  MAYBE_UNUSED GLOBAL_AS const p19           *esalt_bufs,       \
+  MAYBE_UNUSED GLOBAL_AS       u32           *d_return_buf,     \
+  MAYBE_UNUSED GLOBAL_AS       void          *d_extra0_buf,     \
+  MAYBE_UNUSED GLOBAL_AS       void          *d_extra1_buf,     \
+  MAYBE_UNUSED GLOBAL_AS       void          *d_extra2_buf,     \
+  MAYBE_UNUSED GLOBAL_AS       void          *d_extra3_buf,     \
+  MAYBE_UNUSED           const u32            bitmap_mask,      \
+  MAYBE_UNUSED           const u32            bitmap_shift1,    \
+  MAYBE_UNUSED           const u32            bitmap_shift2,    \
+  MAYBE_UNUSED           const u32            salt_pos,         \
+  MAYBE_UNUSED           const u32            loop_pos,         \
+  MAYBE_UNUSED           const u32            loop_cnt,         \
+  MAYBE_UNUSED           const u32            il_cnt,           \
+  MAYBE_UNUSED           const u32            digests_cnt,      \
+  MAYBE_UNUSED           const u32            digests_offset,   \
+  MAYBE_UNUSED           const u32            combs_mode,       \
+  MAYBE_UNUSED           const u64            gid_max
+#else
 #define KERN_ATTR(p2,p4,p5,p6,p19)                              \
   MAYBE_UNUSED GLOBAL_AS       pw_t          *pws,              \
   MAYBE_UNUSED p2        const kernel_rule_t *rules_buf,        \
@@ -62,7 +100,7 @@
   MAYBE_UNUSED           const u32            digests_offset,   \
   MAYBE_UNUSED           const u32            combs_mode,       \
   MAYBE_UNUSED           const u64            gid_max
-
+#endif
 /*
  * Shortcut macros for usage in the actual kernels
  *
@@ -71,8 +109,20 @@
  * do not use rules or tmps, etc.
  */
 
+#ifdef IS_CUDA
+#define KERN_ATTR_BASIC()         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       void, void, void)
+#define KERN_ATTR_BITSLICE()      KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bs_word_t *g_words_buf_s, void, void, void)
+#define KERN_ATTR_ESALT(e)        KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       void, void, e)
+#define KERN_ATTR_RULES()         KERN_ATTR (CONSTANT_AS, GLOBAL_AS   const bf_t      *bfs_buf,       void, void, void)
+#define KERN_ATTR_RULES_ESALT(e)  KERN_ATTR (CONSTANT_AS, GLOBAL_AS   const bf_t      *bfs_buf,       void, void, e)
+#define KERN_ATTR_TMPS(t)         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       t,    void, void)
+#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       t,    void, e)
+#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       t,    h,    void)
+#define KERN_ATTR_VECTOR()        KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *g_words_buf_r, void, void, void)
+#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *g_words_buf_r, void, void, e)
+#else
 #define KERN_ATTR_BASIC()         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,     void, void, void)
-#define KERN_ATTR_BITSLICE()      KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bs_word_t *words_buf_r, void, void, void)
+#define KERN_ATTR_BITSLICE()      KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bs_word_t *words_buf_s, void, void, void)
 #define KERN_ATTR_ESALT(e)        KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,     void, void, e)
 #define KERN_ATTR_RULES()         KERN_ATTR (CONSTANT_AS, GLOBAL_AS   const bf_t      *bfs_buf,     void, void, void)
 #define KERN_ATTR_RULES_ESALT(e)  KERN_ATTR (CONSTANT_AS, GLOBAL_AS   const bf_t      *bfs_buf,     void, void, e)
@@ -81,6 +131,7 @@
 #define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,     t,    h,    void)
 #define KERN_ATTR_VECTOR()        KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *words_buf_r, void, void, void)
 #define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *words_buf_r, void, void, e)
+#endif
 
 // union based packing
 
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index e5924dd13..3606804b4 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -13,6 +13,26 @@
 
 #ifdef IS_CUDA
 
+#if ATTACK_EXEC == 11
+
+CONSTANT_VK u32 generic_constant[8192]; // 32k
+
+#if   ATTACK_KERN == 0
+#define rules_buf   ((const kernel_rule_t *) generic_constant)
+#define words_buf_s g_words_buf_s
+#define words_buf_r g_words_buf_r
+#elif ATTACK_KERN == 1
+#define rules_buf   g_rules_buf
+#define words_buf_s g_words_buf_s
+#define words_buf_r g_words_buf_r
+#elif ATTACK_KERN == 3
+#define rules_buf   g_rules_buf
+#define words_buf_s ((const bs_word_t *) generic_constant)
+#define words_buf_r ((const u32x *)      generic_constant)
+#endif
+
+#endif
+
 DECLSPEC u32 atomic_dec (u32 *p)
 {
   return atomicSub (p, 1);
diff --git a/src/backend.c b/src/backend.c
index 9857dff0c..ddbaf6456 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -1490,6 +1490,34 @@ int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmod
   return 0;
 }
 
+int hc_cuModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuModuleGetGlobal (dptr, bytes, hmod, name);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuModuleGetGlobal(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuModuleGetGlobal(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+
 int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc)
 {
   backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@@ -7346,9 +7374,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     // we don't have sm_* on vendors not NV but it doesn't matter
 
     #if defined (DEBUG)
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D _unroll ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern);
     #else
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern);
     #endif
 
     build_options_buf[build_options_len] = 0;
@@ -8276,7 +8304,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
         {
           CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules,   size_rules);   if (CU_rc == -1) return -1;
-          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c); if (CU_rc == -1) return -1;
+
+          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+          {
+            size_t dummy;
+
+            CU_rc = hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_rules_c, &dummy, device_param->cuda_module, "generic_constant"); if (CU_rc == -1) return -1;
+          }
+          else
+          {
+            CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c); if (CU_rc == -1) return -1;
+          }
 
           CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules); if (CU_rc == -1) return -1;
         }
@@ -8290,10 +8328,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
         {
           CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs,            size_bfs);        if (CU_rc == -1) return -1;
-          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c,          size_bfs);        if (CU_rc == -1) return -1;
-          CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c,           size_tm);         if (CU_rc == -1) return -1;
           CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf,   size_root_css);   if (CU_rc == -1) return -1;
           CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css); if (CU_rc == -1) return -1;
+
+          if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+          {
+            size_t dummy;
+
+            CU_rc = hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_bfs_c, &dummy, device_param->cuda_module, "generic_constant"); if (CU_rc == -1) return -1;
+            CU_rc = hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_tm_c,  &dummy, device_param->cuda_module, "generic_constant"); if (CU_rc == -1) return -1;
+          }
+          else
+          {
+            CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c,          size_bfs);        if (CU_rc == -1) return -1;
+            CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c,           size_tm);         if (CU_rc == -1) return -1;
+          }
         }
       }
 
@@ -10665,11 +10714,11 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       if (device_param->cuda_d_pws_comp_buf)   hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_comp_buf);
       if (device_param->cuda_d_pws_idx)        hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_idx);
       if (device_param->cuda_d_rules)          hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules);
-      if (device_param->cuda_d_rules_c)        hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules_c);
+      //if (device_param->cuda_d_rules_c)        hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules_c);
       if (device_param->cuda_d_combs)          hc_cuMemFree (hashcat_ctx, device_param->cuda_d_combs);
       if (device_param->cuda_d_combs_c)        hc_cuMemFree (hashcat_ctx, device_param->cuda_d_combs_c);
       if (device_param->cuda_d_bfs)            hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs);
-      if (device_param->cuda_d_bfs_c)          hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs_c);
+      //if (device_param->cuda_d_bfs_c)          hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs_c);
       if (device_param->cuda_d_bitmap_s1_a)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_a);
       if (device_param->cuda_d_bitmap_s1_b)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_b);
       if (device_param->cuda_d_bitmap_s1_c)    hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_c);
@@ -10692,7 +10741,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       if (device_param->cuda_d_extra3_buf)     hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra3_buf);
       if (device_param->cuda_d_root_css_buf)   hc_cuMemFree (hashcat_ctx, device_param->cuda_d_root_css_buf);
       if (device_param->cuda_d_markov_css_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_markov_css_buf);
-      if (device_param->cuda_d_tm_c)           hc_cuMemFree (hashcat_ctx, device_param->cuda_d_tm_c);
+      //if (device_param->cuda_d_tm_c)           hc_cuMemFree (hashcat_ctx, device_param->cuda_d_tm_c);
       if (device_param->cuda_d_st_digests_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_digests_buf);
       if (device_param->cuda_d_st_salts_buf)   hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_salts_buf);
       if (device_param->cuda_d_st_esalts_buf)  hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_esalts_buf);

From 53be3e74a3c91e60d2478dd4e952b77755d1637f Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 10 May 2019 13:22:40 +0200
Subject: [PATCH 55/73] Rename some variable to avoid collisions

---
 OpenCL/m01500_a3-pure.cl | 128 +++++++++++++++++-----------------
 OpenCL/m03000_a3-pure.cl | 146 +++++++++++++++++++--------------------
 OpenCL/m14000_a3-pure.cl | 128 +++++++++++++++++-----------------
 3 files changed, 201 insertions(+), 201 deletions(-)

diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl
index 5c534cd4e..ca612828d 100644
--- a/OpenCL/m01500_a3-pure.cl
+++ b/OpenCL/m01500_a3-pure.cl
@@ -1886,7 +1886,7 @@ DECLSPEC void transpose32c (u32 *data)
 // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
 //
 
-KERNEL_FQ void m01500_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_r)
+KERNEL_FQ void m01500_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b)
 {
   const u64 gid = get_global_id (0);
 
@@ -1902,13 +1902,13 @@ KERNEL_FQ void m01500_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_r)
   #endif
   for (int i = 0, j = 0; i < 32; i += 8, j += 7)
   {
-    atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
   }
 }
 
@@ -2035,34 +2035,34 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
   u32 k26 = K26;
   u32 k27 = K27;
 
-  k00 |= words_buf_r[pc_pos].b[ 0];
-  k01 |= words_buf_r[pc_pos].b[ 1];
-  k02 |= words_buf_r[pc_pos].b[ 2];
-  k03 |= words_buf_r[pc_pos].b[ 3];
-  k04 |= words_buf_r[pc_pos].b[ 4];
-  k05 |= words_buf_r[pc_pos].b[ 5];
-  k06 |= words_buf_r[pc_pos].b[ 6];
-  k07 |= words_buf_r[pc_pos].b[ 7];
-  k08 |= words_buf_r[pc_pos].b[ 8];
-  k09 |= words_buf_r[pc_pos].b[ 9];
-  k10 |= words_buf_r[pc_pos].b[10];
-  k11 |= words_buf_r[pc_pos].b[11];
-  k12 |= words_buf_r[pc_pos].b[12];
-  k13 |= words_buf_r[pc_pos].b[13];
-  k14 |= words_buf_r[pc_pos].b[14];
-  k15 |= words_buf_r[pc_pos].b[15];
-  k16 |= words_buf_r[pc_pos].b[16];
-  k17 |= words_buf_r[pc_pos].b[17];
-  k18 |= words_buf_r[pc_pos].b[18];
-  k19 |= words_buf_r[pc_pos].b[19];
-  k20 |= words_buf_r[pc_pos].b[20];
-  k21 |= words_buf_r[pc_pos].b[21];
-  k22 |= words_buf_r[pc_pos].b[22];
-  k23 |= words_buf_r[pc_pos].b[23];
-  k24 |= words_buf_r[pc_pos].b[24];
-  k25 |= words_buf_r[pc_pos].b[25];
-  k26 |= words_buf_r[pc_pos].b[26];
-  k27 |= words_buf_r[pc_pos].b[27];
+  k00 |= words_buf_s[pc_pos].b[ 0];
+  k01 |= words_buf_s[pc_pos].b[ 1];
+  k02 |= words_buf_s[pc_pos].b[ 2];
+  k03 |= words_buf_s[pc_pos].b[ 3];
+  k04 |= words_buf_s[pc_pos].b[ 4];
+  k05 |= words_buf_s[pc_pos].b[ 5];
+  k06 |= words_buf_s[pc_pos].b[ 6];
+  k07 |= words_buf_s[pc_pos].b[ 7];
+  k08 |= words_buf_s[pc_pos].b[ 8];
+  k09 |= words_buf_s[pc_pos].b[ 9];
+  k10 |= words_buf_s[pc_pos].b[10];
+  k11 |= words_buf_s[pc_pos].b[11];
+  k12 |= words_buf_s[pc_pos].b[12];
+  k13 |= words_buf_s[pc_pos].b[13];
+  k14 |= words_buf_s[pc_pos].b[14];
+  k15 |= words_buf_s[pc_pos].b[15];
+  k16 |= words_buf_s[pc_pos].b[16];
+  k17 |= words_buf_s[pc_pos].b[17];
+  k18 |= words_buf_s[pc_pos].b[18];
+  k19 |= words_buf_s[pc_pos].b[19];
+  k20 |= words_buf_s[pc_pos].b[20];
+  k21 |= words_buf_s[pc_pos].b[21];
+  k22 |= words_buf_s[pc_pos].b[22];
+  k23 |= words_buf_s[pc_pos].b[23];
+  k24 |= words_buf_s[pc_pos].b[24];
+  k25 |= words_buf_s[pc_pos].b[25];
+  k26 |= words_buf_s[pc_pos].b[26];
+  k27 |= words_buf_s[pc_pos].b[27];
 
   u32 D00 = 0;
   u32 D01 = 0;
@@ -2487,34 +2487,34 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
   u32 k26 = K26;
   u32 k27 = K27;
 
-  k00 |= words_buf_r[pc_pos].b[ 0];
-  k01 |= words_buf_r[pc_pos].b[ 1];
-  k02 |= words_buf_r[pc_pos].b[ 2];
-  k03 |= words_buf_r[pc_pos].b[ 3];
-  k04 |= words_buf_r[pc_pos].b[ 4];
-  k05 |= words_buf_r[pc_pos].b[ 5];
-  k06 |= words_buf_r[pc_pos].b[ 6];
-  k07 |= words_buf_r[pc_pos].b[ 7];
-  k08 |= words_buf_r[pc_pos].b[ 8];
-  k09 |= words_buf_r[pc_pos].b[ 9];
-  k10 |= words_buf_r[pc_pos].b[10];
-  k11 |= words_buf_r[pc_pos].b[11];
-  k12 |= words_buf_r[pc_pos].b[12];
-  k13 |= words_buf_r[pc_pos].b[13];
-  k14 |= words_buf_r[pc_pos].b[14];
-  k15 |= words_buf_r[pc_pos].b[15];
-  k16 |= words_buf_r[pc_pos].b[16];
-  k17 |= words_buf_r[pc_pos].b[17];
-  k18 |= words_buf_r[pc_pos].b[18];
-  k19 |= words_buf_r[pc_pos].b[19];
-  k20 |= words_buf_r[pc_pos].b[20];
-  k21 |= words_buf_r[pc_pos].b[21];
-  k22 |= words_buf_r[pc_pos].b[22];
-  k23 |= words_buf_r[pc_pos].b[23];
-  k24 |= words_buf_r[pc_pos].b[24];
-  k25 |= words_buf_r[pc_pos].b[25];
-  k26 |= words_buf_r[pc_pos].b[26];
-  k27 |= words_buf_r[pc_pos].b[27];
+  k00 |= words_buf_s[pc_pos].b[ 0];
+  k01 |= words_buf_s[pc_pos].b[ 1];
+  k02 |= words_buf_s[pc_pos].b[ 2];
+  k03 |= words_buf_s[pc_pos].b[ 3];
+  k04 |= words_buf_s[pc_pos].b[ 4];
+  k05 |= words_buf_s[pc_pos].b[ 5];
+  k06 |= words_buf_s[pc_pos].b[ 6];
+  k07 |= words_buf_s[pc_pos].b[ 7];
+  k08 |= words_buf_s[pc_pos].b[ 8];
+  k09 |= words_buf_s[pc_pos].b[ 9];
+  k10 |= words_buf_s[pc_pos].b[10];
+  k11 |= words_buf_s[pc_pos].b[11];
+  k12 |= words_buf_s[pc_pos].b[12];
+  k13 |= words_buf_s[pc_pos].b[13];
+  k14 |= words_buf_s[pc_pos].b[14];
+  k15 |= words_buf_s[pc_pos].b[15];
+  k16 |= words_buf_s[pc_pos].b[16];
+  k17 |= words_buf_s[pc_pos].b[17];
+  k18 |= words_buf_s[pc_pos].b[18];
+  k19 |= words_buf_s[pc_pos].b[19];
+  k20 |= words_buf_s[pc_pos].b[20];
+  k21 |= words_buf_s[pc_pos].b[21];
+  k22 |= words_buf_s[pc_pos].b[22];
+  k23 |= words_buf_s[pc_pos].b[23];
+  k24 |= words_buf_s[pc_pos].b[24];
+  k25 |= words_buf_s[pc_pos].b[25];
+  k26 |= words_buf_s[pc_pos].b[26];
+  k27 |= words_buf_s[pc_pos].b[27];
 
   u32 D00 = 0;
   u32 D01 = 0;
diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl
index 8f1449780..682edabf4 100644
--- a/OpenCL/m03000_a3-pure.cl
+++ b/OpenCL/m03000_a3-pure.cl
@@ -1731,7 +1731,7 @@ DECLSPEC void transpose32c (u32 *data)
 // transpose bitslice mod  : attention race conditions, need different buffers for *in and *out
 //
 
-KERNEL_FQ void m03000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_r)
+KERNEL_FQ void m03000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b)
 {
   const u64 gid = get_global_id (0);
 
@@ -1742,14 +1742,14 @@ KERNEL_FQ void m03000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_r)
 
   for (int i = 0; i < 32; i += 8)
   {
-    atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
   }
 }
 
@@ -1871,38 +1871,38 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ())
   u32 k30 = K30;
   u32 k31 = K31;
 
-  k00 |= words_buf_r[pc_pos].b[ 0];
-  k01 |= words_buf_r[pc_pos].b[ 1];
-  k02 |= words_buf_r[pc_pos].b[ 2];
-  k03 |= words_buf_r[pc_pos].b[ 3];
-  k04 |= words_buf_r[pc_pos].b[ 4];
-  k05 |= words_buf_r[pc_pos].b[ 5];
-  k06 |= words_buf_r[pc_pos].b[ 6];
-  k07 |= words_buf_r[pc_pos].b[ 7];
-  k08 |= words_buf_r[pc_pos].b[ 8];
-  k09 |= words_buf_r[pc_pos].b[ 9];
-  k10 |= words_buf_r[pc_pos].b[10];
-  k11 |= words_buf_r[pc_pos].b[11];
-  k12 |= words_buf_r[pc_pos].b[12];
-  k13 |= words_buf_r[pc_pos].b[13];
-  k14 |= words_buf_r[pc_pos].b[14];
-  k15 |= words_buf_r[pc_pos].b[15];
-  k16 |= words_buf_r[pc_pos].b[16];
-  k17 |= words_buf_r[pc_pos].b[17];
-  k18 |= words_buf_r[pc_pos].b[18];
-  k19 |= words_buf_r[pc_pos].b[19];
-  k20 |= words_buf_r[pc_pos].b[20];
-  k21 |= words_buf_r[pc_pos].b[21];
-  k22 |= words_buf_r[pc_pos].b[22];
-  k23 |= words_buf_r[pc_pos].b[23];
-  k24 |= words_buf_r[pc_pos].b[24];
-  k25 |= words_buf_r[pc_pos].b[25];
-  k26 |= words_buf_r[pc_pos].b[26];
-  k27 |= words_buf_r[pc_pos].b[27];
-  k28 |= words_buf_r[pc_pos].b[28];
-  k29 |= words_buf_r[pc_pos].b[29];
-  k30 |= words_buf_r[pc_pos].b[30];
-  k31 |= words_buf_r[pc_pos].b[31];
+  k00 |= words_buf_s[pc_pos].b[ 0];
+  k01 |= words_buf_s[pc_pos].b[ 1];
+  k02 |= words_buf_s[pc_pos].b[ 2];
+  k03 |= words_buf_s[pc_pos].b[ 3];
+  k04 |= words_buf_s[pc_pos].b[ 4];
+  k05 |= words_buf_s[pc_pos].b[ 5];
+  k06 |= words_buf_s[pc_pos].b[ 6];
+  k07 |= words_buf_s[pc_pos].b[ 7];
+  k08 |= words_buf_s[pc_pos].b[ 8];
+  k09 |= words_buf_s[pc_pos].b[ 9];
+  k10 |= words_buf_s[pc_pos].b[10];
+  k11 |= words_buf_s[pc_pos].b[11];
+  k12 |= words_buf_s[pc_pos].b[12];
+  k13 |= words_buf_s[pc_pos].b[13];
+  k14 |= words_buf_s[pc_pos].b[14];
+  k15 |= words_buf_s[pc_pos].b[15];
+  k16 |= words_buf_s[pc_pos].b[16];
+  k17 |= words_buf_s[pc_pos].b[17];
+  k18 |= words_buf_s[pc_pos].b[18];
+  k19 |= words_buf_s[pc_pos].b[19];
+  k20 |= words_buf_s[pc_pos].b[20];
+  k21 |= words_buf_s[pc_pos].b[21];
+  k22 |= words_buf_s[pc_pos].b[22];
+  k23 |= words_buf_s[pc_pos].b[23];
+  k24 |= words_buf_s[pc_pos].b[24];
+  k25 |= words_buf_s[pc_pos].b[25];
+  k26 |= words_buf_s[pc_pos].b[26];
+  k27 |= words_buf_s[pc_pos].b[27];
+  k28 |= words_buf_s[pc_pos].b[28];
+  k29 |= words_buf_s[pc_pos].b[29];
+  k30 |= words_buf_s[pc_pos].b[30];
+  k31 |= words_buf_s[pc_pos].b[31];
 
   // KGS!@#$% including IP
 
@@ -2323,38 +2323,38 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
   u32 k30 = K30;
   u32 k31 = K31;
 
-  k00 |= words_buf_r[pc_pos].b[ 0];
-  k01 |= words_buf_r[pc_pos].b[ 1];
-  k02 |= words_buf_r[pc_pos].b[ 2];
-  k03 |= words_buf_r[pc_pos].b[ 3];
-  k04 |= words_buf_r[pc_pos].b[ 4];
-  k05 |= words_buf_r[pc_pos].b[ 5];
-  k06 |= words_buf_r[pc_pos].b[ 6];
-  k07 |= words_buf_r[pc_pos].b[ 7];
-  k08 |= words_buf_r[pc_pos].b[ 8];
-  k09 |= words_buf_r[pc_pos].b[ 9];
-  k10 |= words_buf_r[pc_pos].b[10];
-  k11 |= words_buf_r[pc_pos].b[11];
-  k12 |= words_buf_r[pc_pos].b[12];
-  k13 |= words_buf_r[pc_pos].b[13];
-  k14 |= words_buf_r[pc_pos].b[14];
-  k15 |= words_buf_r[pc_pos].b[15];
-  k16 |= words_buf_r[pc_pos].b[16];
-  k17 |= words_buf_r[pc_pos].b[17];
-  k18 |= words_buf_r[pc_pos].b[18];
-  k19 |= words_buf_r[pc_pos].b[19];
-  k20 |= words_buf_r[pc_pos].b[20];
-  k21 |= words_buf_r[pc_pos].b[21];
-  k22 |= words_buf_r[pc_pos].b[22];
-  k23 |= words_buf_r[pc_pos].b[23];
-  k24 |= words_buf_r[pc_pos].b[24];
-  k25 |= words_buf_r[pc_pos].b[25];
-  k26 |= words_buf_r[pc_pos].b[26];
-  k27 |= words_buf_r[pc_pos].b[27];
-  k28 |= words_buf_r[pc_pos].b[28];
-  k29 |= words_buf_r[pc_pos].b[29];
-  k30 |= words_buf_r[pc_pos].b[30];
-  k31 |= words_buf_r[pc_pos].b[31];
+  k00 |= words_buf_s[pc_pos].b[ 0];
+  k01 |= words_buf_s[pc_pos].b[ 1];
+  k02 |= words_buf_s[pc_pos].b[ 2];
+  k03 |= words_buf_s[pc_pos].b[ 3];
+  k04 |= words_buf_s[pc_pos].b[ 4];
+  k05 |= words_buf_s[pc_pos].b[ 5];
+  k06 |= words_buf_s[pc_pos].b[ 6];
+  k07 |= words_buf_s[pc_pos].b[ 7];
+  k08 |= words_buf_s[pc_pos].b[ 8];
+  k09 |= words_buf_s[pc_pos].b[ 9];
+  k10 |= words_buf_s[pc_pos].b[10];
+  k11 |= words_buf_s[pc_pos].b[11];
+  k12 |= words_buf_s[pc_pos].b[12];
+  k13 |= words_buf_s[pc_pos].b[13];
+  k14 |= words_buf_s[pc_pos].b[14];
+  k15 |= words_buf_s[pc_pos].b[15];
+  k16 |= words_buf_s[pc_pos].b[16];
+  k17 |= words_buf_s[pc_pos].b[17];
+  k18 |= words_buf_s[pc_pos].b[18];
+  k19 |= words_buf_s[pc_pos].b[19];
+  k20 |= words_buf_s[pc_pos].b[20];
+  k21 |= words_buf_s[pc_pos].b[21];
+  k22 |= words_buf_s[pc_pos].b[22];
+  k23 |= words_buf_s[pc_pos].b[23];
+  k24 |= words_buf_s[pc_pos].b[24];
+  k25 |= words_buf_s[pc_pos].b[25];
+  k26 |= words_buf_s[pc_pos].b[26];
+  k27 |= words_buf_s[pc_pos].b[27];
+  k28 |= words_buf_s[pc_pos].b[28];
+  k29 |= words_buf_s[pc_pos].b[29];
+  k30 |= words_buf_s[pc_pos].b[30];
+  k31 |= words_buf_s[pc_pos].b[31];
 
   // KGS!@#$% including IP
 
diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl
index cc98a5dce..7d1b33e8a 100644
--- a/OpenCL/m14000_a3-pure.cl
+++ b/OpenCL/m14000_a3-pure.cl
@@ -1731,7 +1731,7 @@ DECLSPEC void transpose32c (u32 *data)
 // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
 //
 
-KERNEL_FQ void m14000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_r)
+KERNEL_FQ void m14000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b)
 {
   const u64 gid = get_global_id (0);
 
@@ -1745,13 +1745,13 @@ KERNEL_FQ void m14000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_r)
   #endif
   for (int i = 0, j = 0; i < 32; i += 8, j += 7)
   {
-    atomic_or (&words_buf_r[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice));
-    atomic_or (&words_buf_r[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice));
+    atomic_or (&words_buf_b[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice));
   }
 }
 
@@ -2080,34 +2080,34 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
   u32 k26 = K26;
   u32 k27 = K27;
 
-  k00 |= words_buf_r[pc_pos].b[ 0];
-  k01 |= words_buf_r[pc_pos].b[ 1];
-  k02 |= words_buf_r[pc_pos].b[ 2];
-  k03 |= words_buf_r[pc_pos].b[ 3];
-  k04 |= words_buf_r[pc_pos].b[ 4];
-  k05 |= words_buf_r[pc_pos].b[ 5];
-  k06 |= words_buf_r[pc_pos].b[ 6];
-  k07 |= words_buf_r[pc_pos].b[ 7];
-  k08 |= words_buf_r[pc_pos].b[ 8];
-  k09 |= words_buf_r[pc_pos].b[ 9];
-  k10 |= words_buf_r[pc_pos].b[10];
-  k11 |= words_buf_r[pc_pos].b[11];
-  k12 |= words_buf_r[pc_pos].b[12];
-  k13 |= words_buf_r[pc_pos].b[13];
-  k14 |= words_buf_r[pc_pos].b[14];
-  k15 |= words_buf_r[pc_pos].b[15];
-  k16 |= words_buf_r[pc_pos].b[16];
-  k17 |= words_buf_r[pc_pos].b[17];
-  k18 |= words_buf_r[pc_pos].b[18];
-  k19 |= words_buf_r[pc_pos].b[19];
-  k20 |= words_buf_r[pc_pos].b[20];
-  k21 |= words_buf_r[pc_pos].b[21];
-  k22 |= words_buf_r[pc_pos].b[22];
-  k23 |= words_buf_r[pc_pos].b[23];
-  k24 |= words_buf_r[pc_pos].b[24];
-  k25 |= words_buf_r[pc_pos].b[25];
-  k26 |= words_buf_r[pc_pos].b[26];
-  k27 |= words_buf_r[pc_pos].b[27];
+  k00 |= words_buf_s[pc_pos].b[ 0];
+  k01 |= words_buf_s[pc_pos].b[ 1];
+  k02 |= words_buf_s[pc_pos].b[ 2];
+  k03 |= words_buf_s[pc_pos].b[ 3];
+  k04 |= words_buf_s[pc_pos].b[ 4];
+  k05 |= words_buf_s[pc_pos].b[ 5];
+  k06 |= words_buf_s[pc_pos].b[ 6];
+  k07 |= words_buf_s[pc_pos].b[ 7];
+  k08 |= words_buf_s[pc_pos].b[ 8];
+  k09 |= words_buf_s[pc_pos].b[ 9];
+  k10 |= words_buf_s[pc_pos].b[10];
+  k11 |= words_buf_s[pc_pos].b[11];
+  k12 |= words_buf_s[pc_pos].b[12];
+  k13 |= words_buf_s[pc_pos].b[13];
+  k14 |= words_buf_s[pc_pos].b[14];
+  k15 |= words_buf_s[pc_pos].b[15];
+  k16 |= words_buf_s[pc_pos].b[16];
+  k17 |= words_buf_s[pc_pos].b[17];
+  k18 |= words_buf_s[pc_pos].b[18];
+  k19 |= words_buf_s[pc_pos].b[19];
+  k20 |= words_buf_s[pc_pos].b[20];
+  k21 |= words_buf_s[pc_pos].b[21];
+  k22 |= words_buf_s[pc_pos].b[22];
+  k23 |= words_buf_s[pc_pos].b[23];
+  k24 |= words_buf_s[pc_pos].b[24];
+  k25 |= words_buf_s[pc_pos].b[25];
+  k26 |= words_buf_s[pc_pos].b[26];
+  k27 |= words_buf_s[pc_pos].b[27];
 
   DES
   (
@@ -2596,34 +2596,34 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
   u32 k26 = K26;
   u32 k27 = K27;
 
-  k00 |= words_buf_r[pc_pos].b[ 0];
-  k01 |= words_buf_r[pc_pos].b[ 1];
-  k02 |= words_buf_r[pc_pos].b[ 2];
-  k03 |= words_buf_r[pc_pos].b[ 3];
-  k04 |= words_buf_r[pc_pos].b[ 4];
-  k05 |= words_buf_r[pc_pos].b[ 5];
-  k06 |= words_buf_r[pc_pos].b[ 6];
-  k07 |= words_buf_r[pc_pos].b[ 7];
-  k08 |= words_buf_r[pc_pos].b[ 8];
-  k09 |= words_buf_r[pc_pos].b[ 9];
-  k10 |= words_buf_r[pc_pos].b[10];
-  k11 |= words_buf_r[pc_pos].b[11];
-  k12 |= words_buf_r[pc_pos].b[12];
-  k13 |= words_buf_r[pc_pos].b[13];
-  k14 |= words_buf_r[pc_pos].b[14];
-  k15 |= words_buf_r[pc_pos].b[15];
-  k16 |= words_buf_r[pc_pos].b[16];
-  k17 |= words_buf_r[pc_pos].b[17];
-  k18 |= words_buf_r[pc_pos].b[18];
-  k19 |= words_buf_r[pc_pos].b[19];
-  k20 |= words_buf_r[pc_pos].b[20];
-  k21 |= words_buf_r[pc_pos].b[21];
-  k22 |= words_buf_r[pc_pos].b[22];
-  k23 |= words_buf_r[pc_pos].b[23];
-  k24 |= words_buf_r[pc_pos].b[24];
-  k25 |= words_buf_r[pc_pos].b[25];
-  k26 |= words_buf_r[pc_pos].b[26];
-  k27 |= words_buf_r[pc_pos].b[27];
+  k00 |= words_buf_s[pc_pos].b[ 0];
+  k01 |= words_buf_s[pc_pos].b[ 1];
+  k02 |= words_buf_s[pc_pos].b[ 2];
+  k03 |= words_buf_s[pc_pos].b[ 3];
+  k04 |= words_buf_s[pc_pos].b[ 4];
+  k05 |= words_buf_s[pc_pos].b[ 5];
+  k06 |= words_buf_s[pc_pos].b[ 6];
+  k07 |= words_buf_s[pc_pos].b[ 7];
+  k08 |= words_buf_s[pc_pos].b[ 8];
+  k09 |= words_buf_s[pc_pos].b[ 9];
+  k10 |= words_buf_s[pc_pos].b[10];
+  k11 |= words_buf_s[pc_pos].b[11];
+  k12 |= words_buf_s[pc_pos].b[12];
+  k13 |= words_buf_s[pc_pos].b[13];
+  k14 |= words_buf_s[pc_pos].b[14];
+  k15 |= words_buf_s[pc_pos].b[15];
+  k16 |= words_buf_s[pc_pos].b[16];
+  k17 |= words_buf_s[pc_pos].b[17];
+  k18 |= words_buf_s[pc_pos].b[18];
+  k19 |= words_buf_s[pc_pos].b[19];
+  k20 |= words_buf_s[pc_pos].b[20];
+  k21 |= words_buf_s[pc_pos].b[21];
+  k22 |= words_buf_s[pc_pos].b[22];
+  k23 |= words_buf_s[pc_pos].b[23];
+  k24 |= words_buf_s[pc_pos].b[24];
+  k25 |= words_buf_s[pc_pos].b[25];
+  k26 |= words_buf_s[pc_pos].b[26];
+  k27 |= words_buf_s[pc_pos].b[27];
 
   DES
   (

From d378aa7ab9dfa0a84db32148d5b43c3fdb8936b1 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 10 May 2019 16:37:49 +0200
Subject: [PATCH 56/73] Show host memory requirement on startup

---
 include/types.h |  5 +++--
 src/backend.c   | 33 ++++++++++++++-------------------
 src/main.c      | 13 +++++++++++++
 3 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/include/types.h b/include/types.h
index 76f8910a7..d4bcd581d 100644
--- a/include/types.h
+++ b/include/types.h
@@ -128,8 +128,9 @@ typedef enum event_identifier
   EVENT_MONITOR_NOINPUT_ABORT     = 0x00000088,
   EVENT_BACKEND_SESSION_POST      = 0x00000090,
   EVENT_BACKEND_SESSION_PRE       = 0x00000091,
-  EVENT_BACKEND_DEVICE_INIT_POST  = 0x00000092,
-  EVENT_BACKEND_DEVICE_INIT_PRE   = 0x00000093,
+  EVENT_BACKEND_SESSION_HOSTMEM   = 0x00000092,
+  EVENT_BACKEND_DEVICE_INIT_POST  = 0x00000093,
+  EVENT_BACKEND_DEVICE_INIT_PRE   = 0x00000094,
   EVENT_OUTERLOOP_FINISHED        = 0x000000a0,
   EVENT_OUTERLOOP_MAINSCREEN      = 0x000000a1,
   EVENT_OUTERLOOP_STARTING        = 0x000000a2,
diff --git a/src/backend.c b/src/backend.c
index ddbaf6456..e7e31ae11 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -6944,6 +6944,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return 0;
 
+  u64 size_total_host_all = 0;
+
   u32 hardware_power_all = 0;
 
   int CU_rc;
@@ -10315,12 +10317,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     // this value should represent a reasonable amount of memory a host system has per GPU.
     // note we're allocating 3 blocks of that size.
 
-    #define PWS_SPACE (1024 * 1024 * 1024)
+    const u64 PWS_SPACE = 4ull * 1024ull * 1024ull * 1024ull;
 
     // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
     // let's add some extra space just to be sure.
 
-    #define EXTRA_SPACE (64 * 1024 * 1024)
+    const u64 EXTRA_SPACE = 64ull * 1024ull * 1024ull;
 
     while (kernel_accel_max >= kernel_accel_min)
     {
@@ -10412,6 +10414,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if ((size_total + EXTRA_SPACE) > device_param->device_available_mem) memory_limit_hit = 1;
 
+      if (memory_limit_hit == 1)
+      {
+        kernel_accel_max--;
+
+        continue;
+      }
+
       const u64 size_total_host
         = size_pws_comp
         + size_pws_idx
@@ -10423,23 +10432,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         + size_pws_pre
         + size_pws_base;
 
-      if ((size_total_host + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;
-
-      #if defined (__x86_x64__)
-      const u64 MAX_HOST_MEMORY = 16ull * 1024ull * 1024ull * 1024ull; // don't be too memory hungry
-      #else
-      const u64 MAX_HOST_MEMORY =  2ull * 1024ull * 1024ull * 1024ull; // windows 7 starter limits to 2gb instead of 4gb
-      #endif
-
-      // we assume all devices have the same specs here, which is wrong, it's a start
-      if ((size_total_host * backend_ctx->backend_devices_cnt) > MAX_HOST_MEMORY) memory_limit_hit = 1;
-
-      if (memory_limit_hit == 1)
-      {
-        kernel_accel_max--;
-
-        continue;
-      }
+      size_total_host_all += size_total_host + EXTRA_SPACE;
 
       break;
     }
@@ -10680,6 +10673,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
   backend_ctx->hardware_power_all = hardware_power_all;
 
+  EVENT_DATA (EVENT_BACKEND_SESSION_HOSTMEM, &size_total_host_all, sizeof (u64));
+
   return 0;
 }
 
diff --git a/src/main.c b/src/main.c
index 48c357c1a..c5a684f1e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -567,6 +567,18 @@ static void main_backend_session_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx,
   event_log_info_nn (hashcat_ctx, "Initialized device kernels and memory...");
 }
 
+static void main_backend_session_hostmem (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
+{
+  const user_options_t *user_options = hashcat_ctx->user_options;
+
+  if (user_options->quiet == true) return;
+
+  const u64 *hostmem = (const u64 *) buf;
+
+  event_log_info (hashcat_ctx, "Host memory required for this attack: %" PRIu64 " MB", *hostmem / (1024 * 1024));
+  event_log_info (hashcat_ctx, NULL);
+}
+
 static void main_backend_device_init_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
@@ -1024,6 +1036,7 @@ static void event (const u32 id, hashcat_ctx_t *hashcat_ctx, const void *buf, co
     case EVENT_MONITOR_NOINPUT_ABORT:     main_monitor_noinput_abort     (hashcat_ctx, buf, len); break;
     case EVENT_BACKEND_SESSION_POST:      main_backend_session_post      (hashcat_ctx, buf, len); break;
     case EVENT_BACKEND_SESSION_PRE:       main_backend_session_pre       (hashcat_ctx, buf, len); break;
+    case EVENT_BACKEND_SESSION_HOSTMEM:   main_backend_session_hostmem   (hashcat_ctx, buf, len); break;
     case EVENT_BACKEND_DEVICE_INIT_POST:  main_backend_device_init_post  (hashcat_ctx, buf, len); break;
     case EVENT_BACKEND_DEVICE_INIT_PRE:   main_backend_device_init_pre   (hashcat_ctx, buf, len); break;
     case EVENT_OUTERLOOP_FINISHED:        main_outerloop_finished        (hashcat_ctx, buf, len); break;

From ba43223c272bf314c1d14719f24d4c88491d01c0 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 10 May 2019 17:26:22 +0200
Subject: [PATCH 57/73] Add missing entry in changes.txt

---
 docs/changes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/changes.txt b/docs/changes.txt
index 2604b1f45..b81e1ae84 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -60,6 +60,7 @@
 - OpenCL Runtime: Workaround JiT compiler error on AMDGPU driver compiling WPA-EAPOL-PBKDF2 OpenCL kernel
 - OpenCL Runtime: Workaround JiT compiler error on ROCM 2.3 driver if the 'inline' keyword is used in function declaration
 - OpenCL Runtime: Workaround memory allocation error on AMD driver on Windows leading to CL_MEM_OBJECT_ALLOCATION_FAILURE
+- Startup Screen: Provide an estimate of host memory requirements for the requested attack
 - Tuning Database: Updated hashcat.hctune with new models and refreshed vector width values
 - WPA/WPA2 cracking: In the potfile, replace password with PMK in order to detect already cracked networks across all WPA modes
 

From b0f2fea883a8be971aa88f889223bd8b55b96cd0 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 10 May 2019 17:26:50 +0200
Subject: [PATCH 58/73] Update hashcat.hctune for CUDA instead of OpenCL on
 NVidia GPU

---
 hashcat.hctune | 42 ++++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/hashcat.hctune b/hashcat.hctune
index 3beea7ae3..caf16bfd9 100644
--- a/hashcat.hctune
+++ b/hashcat.hctune
@@ -312,29 +312,27 @@ ALIAS_nv_real_simd                              3       11900   2       A
 ALIAS_nv_real_simd                              3       13300   4       A       A
 ALIAS_nv_real_simd                              3       18700   8       A       A
 
-ALIAS_nv_sm50_or_higher                         3       0       4       A       A
-ALIAS_nv_sm50_or_higher                         3       10      4       A       A
-ALIAS_nv_sm50_or_higher                         3       11      4       A       A
-ALIAS_nv_sm50_or_higher                         3       12      4       A       A
-ALIAS_nv_sm50_or_higher                         3       20      4       A       A
-ALIAS_nv_sm50_or_higher                         3       21      4       A       A
-ALIAS_nv_sm50_or_higher                         3       22      4       A       A
-ALIAS_nv_sm50_or_higher                         3       23      4       A       A
-ALIAS_nv_sm50_or_higher                         3       30      4       A       A
-ALIAS_nv_sm50_or_higher                         3       40      4       A       A
+ALIAS_nv_sm50_or_higher                         3       0       8       A       A
+ALIAS_nv_sm50_or_higher                         3       10      8       A       A
+ALIAS_nv_sm50_or_higher                         3       11      8       A       A
+ALIAS_nv_sm50_or_higher                         3       12      8       A       A
+ALIAS_nv_sm50_or_higher                         3       20      8       A       A
+ALIAS_nv_sm50_or_higher                         3       21      8       A       A
+ALIAS_nv_sm50_or_higher                         3       22      8       A       A
+ALIAS_nv_sm50_or_higher                         3       23      8       A       A
+ALIAS_nv_sm50_or_higher                         3       30      8       A       A
+ALIAS_nv_sm50_or_higher                         3       40      8       A       A
 ALIAS_nv_sm50_or_higher                         3       200     8       A       A
-ALIAS_nv_sm50_or_higher                         3       900     4       A       A
-ALIAS_nv_sm50_or_higher                         3       1000    4       A       A
-ALIAS_nv_sm50_or_higher                         3       1100    2       A       A
-ALIAS_nv_sm50_or_higher                         3       2400    4       A       A
-ALIAS_nv_sm50_or_higher                         3       2410    4       A       A
-ALIAS_nv_sm50_or_higher                         3       3800    4       A       A
-ALIAS_nv_sm50_or_higher                         3       4800    4       A       A
-ALIAS_nv_sm50_or_higher                         3       5500    4       A       A
-ALIAS_nv_sm50_or_higher                         3       7300    2       A       A
-ALIAS_nv_sm50_or_higher                         3       8000    2       A       A
-ALIAS_nv_sm50_or_higher                         3       9900    4       A       A
-ALIAS_nv_sm50_or_higher                         3       16400   4       A       A
+ALIAS_nv_sm50_or_higher                         3       900     8       A       A
+ALIAS_nv_sm50_or_higher                         3       1000    8       A       A
+ALIAS_nv_sm50_or_higher                         3       1100    8       A       A
+ALIAS_nv_sm50_or_higher                         3       2400    8       A       A
+ALIAS_nv_sm50_or_higher                         3       2410    8       A       A
+ALIAS_nv_sm50_or_higher                         3       3800    8       A       A
+ALIAS_nv_sm50_or_higher                         3       4800    8       A       A
+ALIAS_nv_sm50_or_higher                         3       5500    8       A       A
+ALIAS_nv_sm50_or_higher                         3       9900    8       A       A
+ALIAS_nv_sm50_or_higher                         3       16400   8       A       A
 ALIAS_nv_sm50_or_higher                         3       18700   8       A       A
 
 ##

From d59474fded17b1c4eacda8dd8f0adbeb6a3a0a97 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 10 May 2019 17:27:15 +0200
Subject: [PATCH 59/73] Testwise unlock full thread count on NVidia

---
 src/backend.c | 58 +++++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index e7e31ae11..726594fe6 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -8782,7 +8782,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple1 = device_param->device_maxworkgroup_size;
 
             if (CL_rc == -1) return -1;
 
@@ -8802,7 +8802,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple2 = device_param->device_maxworkgroup_size;
 
             if (CL_rc == -1) return -1;
 
@@ -8822,7 +8822,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple3 = device_param->device_maxworkgroup_size;
 
             if (CL_rc == -1) return -1;
           }
@@ -8842,7 +8842,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple4 = device_param->device_maxworkgroup_size;
 
             if (CL_rc == -1) return -1;
           }
@@ -8867,7 +8867,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple1 = device_param->device_maxworkgroup_size;
 
             if (CL_rc == -1) return -1;
 
@@ -8887,7 +8887,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple2 = device_param->device_maxworkgroup_size;
 
             if (CL_rc == -1) return -1;
 
@@ -8907,7 +8907,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple3 = device_param->device_maxworkgroup_size;
 
             if (CL_rc == -1) return -1;
           }
@@ -8927,7 +8927,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
+            device_param->kernel_preferred_wgs_multiple4 = device_param->device_maxworkgroup_size;
 
             if (CL_rc == -1) return -1;
           }
@@ -8956,7 +8956,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
               if (CL_rc == -1) return -1;
 
-              device_param->kernel_preferred_wgs_multiple_tm = device_param->cuda_warp_size;
+              device_param->kernel_preferred_wgs_multiple_tm = device_param->device_maxworkgroup_size;
 
               if (CL_rc == -1) return -1;
             }
@@ -8981,7 +8981,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (CL_rc == -1) return -1;
 
-        device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
+        device_param->kernel_preferred_wgs_multiple1 = device_param->device_maxworkgroup_size;
 
         if (CL_rc == -1) return -1;
 
@@ -9001,7 +9001,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (CL_rc == -1) return -1;
 
-        device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
+        device_param->kernel_preferred_wgs_multiple2 = device_param->device_maxworkgroup_size;
 
         if (CL_rc == -1) return -1;
 
@@ -9021,7 +9021,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (CL_rc == -1) return -1;
 
-        device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
+        device_param->kernel_preferred_wgs_multiple3 = device_param->device_maxworkgroup_size;
 
         if (CL_rc == -1) return -1;
 
@@ -9043,7 +9043,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple12 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple12 = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9066,7 +9066,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple23 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple23 = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9089,7 +9089,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_init2 = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9112,7 +9112,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9135,7 +9135,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9158,7 +9158,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9181,7 +9181,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9204,7 +9204,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9224,7 +9224,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (CL_rc == -1) return -1;
 
-      device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size;
+      device_param->kernel_preferred_wgs_multiple_memset = device_param->device_maxworkgroup_size;
 
       if (CL_rc == -1) return -1;
 
@@ -9246,7 +9246,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (CL_rc == -1) return -1;
 
-      device_param->kernel_preferred_wgs_multiple_atinit = device_param->cuda_warp_size;
+      device_param->kernel_preferred_wgs_multiple_atinit = device_param->device_maxworkgroup_size;
 
       if (CL_rc == -1) return -1;
 
@@ -9267,7 +9267,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (CL_rc == -1) return -1;
 
-      device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size;
+      device_param->kernel_preferred_wgs_multiple_decompress = device_param->device_maxworkgroup_size;
 
       if (CL_rc == -1) return -1;
 
@@ -9299,7 +9299,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
 
@@ -9317,7 +9317,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
 
@@ -9341,7 +9341,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9359,7 +9359,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9388,7 +9388,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_amp = device_param->cuda_warp_size;
+          device_param->kernel_preferred_wgs_multiple_amp = device_param->device_maxworkgroup_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -10317,7 +10317,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     // this value should represent a reasonable amount of memory a host system has per GPU.
     // note we're allocating 3 blocks of that size.
 
-    const u64 PWS_SPACE = 4ull * 1024ull * 1024ull * 1024ull;
+    const u64 PWS_SPACE = 1024ull * 1024ull * 1024ull;
 
     // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
     // let's add some extra space just to be sure.

From 2b0f657564290d1f7c94a90a3e10e1d220b886b7 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 10 May 2019 23:51:09 +0200
Subject: [PATCH 60/73] Limit some modules to 64 threads in order to get
 additional L1 cache

---
 docs/changes.txt           | 1 +
 src/modules/module_01800.c | 9 ++++++++-
 src/modules/module_05500.c | 9 ++++++++-
 src/modules/module_06221.c | 9 ++++++++-
 src/modules/module_06222.c | 9 ++++++++-
 src/modules/module_06223.c | 9 ++++++++-
 src/modules/module_07700.c | 9 ++++++++-
 src/modules/module_07900.c | 9 ++++++++-
 src/modules/module_08200.c | 9 ++++++++-
 src/modules/module_08500.c | 9 ++++++++-
 src/modules/module_11400.c | 9 ++++++++-
 src/modules/module_11500.c | 9 ++++++++-
 src/modules/module_11760.c | 9 ++++++++-
 src/modules/module_11860.c | 9 ++++++++-
 src/modules/module_12400.c | 9 ++++++++-
 src/modules/module_13711.c | 9 ++++++++-
 src/modules/module_13712.c | 9 ++++++++-
 src/modules/module_13713.c | 9 ++++++++-
 src/modules/module_13721.c | 9 ++++++++-
 src/modules/module_13722.c | 9 ++++++++-
 src/modules/module_13723.c | 9 ++++++++-
 src/modules/module_13731.c | 9 ++++++++-
 src/modules/module_13732.c | 9 ++++++++-
 src/modules/module_13733.c | 9 ++++++++-
 src/modules/module_13741.c | 9 ++++++++-
 src/modules/module_13742.c | 9 ++++++++-
 src/modules/module_13743.c | 9 ++++++++-
 src/modules/module_13751.c | 9 ++++++++-
 src/modules/module_13752.c | 9 ++++++++-
 src/modules/module_13753.c | 9 ++++++++-
 src/modules/module_13761.c | 9 ++++++++-
 src/modules/module_13762.c | 9 ++++++++-
 src/modules/module_13763.c | 9 ++++++++-
 src/modules/module_13771.c | 9 ++++++++-
 src/modules/module_13772.c | 9 ++++++++-
 src/modules/module_13773.c | 9 ++++++++-
 src/modules/module_15900.c | 9 ++++++++-
 src/modules/module_16600.c | 9 ++++++++-
 src/modules/module_19000.c | 9 ++++++++-
 src/modules/module_19100.c | 9 ++++++++-
 src/modules/module_19200.c | 9 ++++++++-
 src/modules/module_19300.c | 9 ++++++++-
 src/modules/module_20011.c | 9 ++++++++-
 src/modules/module_20012.c | 9 ++++++++-
 src/modules/module_20013.c | 9 ++++++++-
 45 files changed, 353 insertions(+), 44 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index b81e1ae84..384f3bf21 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -56,6 +56,7 @@
 - OpenCL Runtime: Disable OpenCL kernel cache on Apple for Intel CPU (throws CL_BUILD_PROGRAM_FAILURE for no reason)
 - OpenCL Runtime: Improve ROCM detection and make sure to not confuse with recent AMDGPU drivers
 - OpenCL Runtime: Not using amd_bytealign (amd_bitalign is fine) on AMDGPU driver drastically reduces JiT segfaults
+- OpenCL Runtime: Unlocked maximum thread count
 - OpenCL Runtime: Update unstable mode warnings for Apple and AMDGPU drivers
 - OpenCL Runtime: Workaround JiT compiler error on AMDGPU driver compiling WPA-EAPOL-PBKDF2 OpenCL kernel
 - OpenCL Runtime: Workaround JiT compiler error on ROCM 2.3 driver if the 'inline' keyword is used in function declaration
diff --git a/src/modules/module_01800.c b/src/modules/module_01800.c
index 2dc77c887..793e17a72 100644
--- a/src/modules/module_01800.c
+++ b/src/modules/module_01800.c
@@ -431,6 +431,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   char *jit_build_options = NULL;
@@ -562,7 +569,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_05500.c b/src/modules/module_05500.c
index 4f001d41c..6aa5968c1 100644
--- a/src/modules/module_05500.c
+++ b/src/modules/module_05500.c
@@ -86,6 +86,13 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -428,7 +435,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_06221.c b/src/modules/module_06221.c
index 3e0125822..7decf6170 100644
--- a/src/modules/module_06221.c
+++ b/src/modules/module_06221.c
@@ -109,6 +109,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -270,7 +277,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_06222.c b/src/modules/module_06222.c
index 6d8c19d19..1f45e1082 100644
--- a/src/modules/module_06222.c
+++ b/src/modules/module_06222.c
@@ -109,6 +109,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -270,7 +277,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_06223.c b/src/modules/module_06223.c
index 4f317a345..bf00bbbc4 100644
--- a/src/modules/module_06223.c
+++ b/src/modules/module_06223.c
@@ -109,6 +109,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -270,7 +277,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_07700.c b/src/modules/module_07700.c
index df9089e26..7f213e0d7 100644
--- a/src/modules/module_07700.c
+++ b/src/modules/module_07700.c
@@ -51,6 +51,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -174,7 +181,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_07900.c b/src/modules/module_07900.c
index 61b1be24a..a0521a15e 100644
--- a/src/modules/module_07900.c
+++ b/src/modules/module_07900.c
@@ -66,6 +66,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 static void drupal7_decode (u8 digest[64], const u8 buf[44])
 {
   int l;
@@ -464,7 +471,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_08200.c b/src/modules/module_08200.c
index 9a8dc5772..255f6d37e 100644
--- a/src/modules/module_08200.c
+++ b/src/modules/module_08200.c
@@ -82,6 +82,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -275,7 +282,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_08500.c b/src/modules/module_08500.c
index bdd563b51..58fa0184a 100644
--- a/src/modules/module_08500.c
+++ b/src/modules/module_08500.c
@@ -51,6 +51,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -217,7 +224,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_11400.c b/src/modules/module_11400.c
index f7363e2b2..681840b90 100644
--- a/src/modules/module_11400.c
+++ b/src/modules/module_11400.c
@@ -78,6 +78,13 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -467,7 +474,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_11500.c b/src/modules/module_11500.c
index 6d4b3453f..82aa44375 100644
--- a/src/modules/module_11500.c
+++ b/src/modules/module_11500.c
@@ -41,6 +41,13 @@ u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig,
 const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
 const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -148,7 +155,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_11760.c b/src/modules/module_11760.c
index 0fbcc4eeb..65358b8a7 100644
--- a/src/modules/module_11760.c
+++ b/src/modules/module_11760.c
@@ -52,6 +52,13 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   return false;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -189,7 +196,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_11860.c b/src/modules/module_11860.c
index 92e2d632d..6f99de148 100644
--- a/src/modules/module_11860.c
+++ b/src/modules/module_11860.c
@@ -52,6 +52,13 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   return false;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -213,7 +220,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_12400.c b/src/modules/module_12400.c
index c024fd835..8de967dfe 100644
--- a/src/modules/module_12400.c
+++ b/src/modules/module_12400.c
@@ -66,6 +66,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -234,7 +241,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13711.c b/src/modules/module_13711.c
index 1948988ed..4825fce79 100644
--- a/src/modules/module_13711.c
+++ b/src/modules/module_13711.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13712.c b/src/modules/module_13712.c
index 4ac231ce5..c22f15d0b 100644
--- a/src/modules/module_13712.c
+++ b/src/modules/module_13712.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13713.c b/src/modules/module_13713.c
index 91d69a767..5652d6eba 100644
--- a/src/modules/module_13713.c
+++ b/src/modules/module_13713.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13721.c b/src/modules/module_13721.c
index 99fcc128d..ae0d3b48c 100644
--- a/src/modules/module_13721.c
+++ b/src/modules/module_13721.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13722.c b/src/modules/module_13722.c
index 17dd5c082..201735ad5 100644
--- a/src/modules/module_13722.c
+++ b/src/modules/module_13722.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13723.c b/src/modules/module_13723.c
index 25bdbe5eb..560707f4f 100644
--- a/src/modules/module_13723.c
+++ b/src/modules/module_13723.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13731.c b/src/modules/module_13731.c
index a0062491b..e1fff1cc3 100644
--- a/src/modules/module_13731.c
+++ b/src/modules/module_13731.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13732.c b/src/modules/module_13732.c
index bb6c45321..89b5970d9 100644
--- a/src/modules/module_13732.c
+++ b/src/modules/module_13732.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13733.c b/src/modules/module_13733.c
index b5be933eb..9d69d0049 100644
--- a/src/modules/module_13733.c
+++ b/src/modules/module_13733.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13741.c b/src/modules/module_13741.c
index bec68bcf5..3cbccc940 100644
--- a/src/modules/module_13741.c
+++ b/src/modules/module_13741.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13742.c b/src/modules/module_13742.c
index 27d585271..950326d40 100644
--- a/src/modules/module_13742.c
+++ b/src/modules/module_13742.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13743.c b/src/modules/module_13743.c
index 77f9f2d69..d317723dc 100644
--- a/src/modules/module_13743.c
+++ b/src/modules/module_13743.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13751.c b/src/modules/module_13751.c
index 007ef20c6..84451ab96 100644
--- a/src/modules/module_13751.c
+++ b/src/modules/module_13751.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13752.c b/src/modules/module_13752.c
index 70395556c..f26d91142 100644
--- a/src/modules/module_13752.c
+++ b/src/modules/module_13752.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13753.c b/src/modules/module_13753.c
index bac3bb312..7ada76eff 100644
--- a/src/modules/module_13753.c
+++ b/src/modules/module_13753.c
@@ -136,6 +136,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -313,7 +320,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13761.c b/src/modules/module_13761.c
index c7845c394..8263b33e2 100644
--- a/src/modules/module_13761.c
+++ b/src/modules/module_13761.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13762.c b/src/modules/module_13762.c
index 2cca29da7..c742e2639 100644
--- a/src/modules/module_13762.c
+++ b/src/modules/module_13762.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13763.c b/src/modules/module_13763.c
index 5f3b4a37e..3b8ab3ba5 100644
--- a/src/modules/module_13763.c
+++ b/src/modules/module_13763.c
@@ -137,6 +137,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -314,7 +321,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13771.c b/src/modules/module_13771.c
index 9339bc87d..573f4332f 100644
--- a/src/modules/module_13771.c
+++ b/src/modules/module_13771.c
@@ -140,6 +140,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -317,7 +324,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13772.c b/src/modules/module_13772.c
index b72a010bf..070e37971 100644
--- a/src/modules/module_13772.c
+++ b/src/modules/module_13772.c
@@ -140,6 +140,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -317,7 +324,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13773.c b/src/modules/module_13773.c
index 6820de1bb..c49a6d09c 100644
--- a/src/modules/module_13773.c
+++ b/src/modules/module_13773.c
@@ -140,6 +140,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -317,7 +324,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_15900.c b/src/modules/module_15900.c
index 2b19f5213..e5eb8625b 100644
--- a/src/modules/module_15900.c
+++ b/src/modules/module_15900.c
@@ -97,6 +97,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: self-test failed
@@ -426,7 +433,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_16600.c b/src/modules/module_16600.c
index 909f6c1e9..2db263244 100644
--- a/src/modules/module_16600.c
+++ b/src/modules/module_16600.c
@@ -60,6 +60,13 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -226,7 +233,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_19000.c b/src/modules/module_19000.c
index 572ebe560..9243e6166 100644
--- a/src/modules/module_19000.c
+++ b/src/modules/module_19000.c
@@ -57,6 +57,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -221,7 +228,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_19100.c b/src/modules/module_19100.c
index 44f6275b4..7fe5d2181 100644
--- a/src/modules/module_19100.c
+++ b/src/modules/module_19100.c
@@ -57,6 +57,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   char *jit_build_options = NULL;
@@ -246,7 +253,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_19200.c b/src/modules/module_19200.c
index 77a1c80bf..a70ecc299 100644
--- a/src/modules/module_19200.c
+++ b/src/modules/module_19200.c
@@ -59,6 +59,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   char *jit_build_options = NULL;
@@ -248,7 +255,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_19300.c b/src/modules/module_19300.c
index 4b1da6300..27a63b4da 100644
--- a/src/modules/module_19300.c
+++ b/src/modules/module_19300.c
@@ -59,6 +59,13 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -237,7 +244,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_20011.c b/src/modules/module_20011.c
index 1bcad8cb6..45b75141a 100644
--- a/src/modules/module_20011.c
+++ b/src/modules/module_20011.c
@@ -84,6 +84,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -228,7 +235,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min        = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max      = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max      = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min      = MODULE_DEFAULT;
   module_ctx->module_kern_type               = module_kern_type;
   module_ctx->module_kern_type_dynamic       = MODULE_DEFAULT;
diff --git a/src/modules/module_20012.c b/src/modules/module_20012.c
index 0dd86e76c..302c1f23b 100644
--- a/src/modules/module_20012.c
+++ b/src/modules/module_20012.c
@@ -84,6 +84,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -228,7 +235,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min        = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max      = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max      = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min      = MODULE_DEFAULT;
   module_ctx->module_kern_type               = module_kern_type;
   module_ctx->module_kern_type_dynamic       = MODULE_DEFAULT;
diff --git a/src/modules/module_20013.c b/src/modules/module_20013.c
index 4a7fb4bc8..535e0922b 100644
--- a/src/modules/module_20013.c
+++ b/src/modules/module_20013.c
@@ -84,6 +84,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 64; // performance
+
+  return kernel_threads_max;
+}
+
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -228,7 +235,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min        = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max      = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max      = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min      = MODULE_DEFAULT;
   module_ctx->module_kern_type               = module_kern_type;
   module_ctx->module_kern_type_dynamic       = MODULE_DEFAULT;

From 7832c544520fb24cf7c9da5e3566864b85e16e76 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 11 May 2019 09:32:16 +0200
Subject: [PATCH 61/73] Fix constant memory use of bfs_buf

---
 OpenCL/inc_common.h    | 38 +++++++++++++++++++-------------------
 OpenCL/inc_platform.cl |  3 +++
 OpenCL/inc_simd.cl     | 36 ++++++++++++++++++------------------
 OpenCL/inc_simd.h      |  6 +++---
 4 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h
index e323d0e0a..58a6bbeb5 100644
--- a/OpenCL/inc_common.h
+++ b/OpenCL/inc_common.h
@@ -110,27 +110,27 @@
  */
 
 #ifdef IS_CUDA
-#define KERN_ATTR_BASIC()         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       void, void, void)
-#define KERN_ATTR_BITSLICE()      KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bs_word_t *g_words_buf_s, void, void, void)
-#define KERN_ATTR_ESALT(e)        KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       void, void, e)
+#define KERN_ATTR_BASIC()         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     void, void, void)
+#define KERN_ATTR_BITSLICE()      KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bs_word_t *g_words_buf_s, void, void, void)
+#define KERN_ATTR_ESALT(e)        KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     void, void, e)
+#define KERN_ATTR_RULES()         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     void, void, void)
+#define KERN_ATTR_RULES_ESALT(e)  KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     void, void, e)
+#define KERN_ATTR_TMPS(t)         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     t,    void, void)
+#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     t,    void, e)
+#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *g_bfs_buf,     t,    h,    void)
+#define KERN_ATTR_VECTOR()        KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const u32x      *g_words_buf_r, void, void, void)
+#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const u32x      *g_words_buf_r, void, void, e)
+#else
+#define KERN_ATTR_BASIC()         KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bf_t      *bfs_buf,       void, void, void)
+#define KERN_ATTR_BITSLICE()      KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bs_word_t *words_buf_s,   void, void, void)
+#define KERN_ATTR_ESALT(e)        KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bf_t      *bfs_buf,       void, void, e)
 #define KERN_ATTR_RULES()         KERN_ATTR (CONSTANT_AS, GLOBAL_AS   const bf_t      *bfs_buf,       void, void, void)
 #define KERN_ATTR_RULES_ESALT(e)  KERN_ATTR (CONSTANT_AS, GLOBAL_AS   const bf_t      *bfs_buf,       void, void, e)
-#define KERN_ATTR_TMPS(t)         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       t,    void, void)
-#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       t,    void, e)
-#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,       t,    h,    void)
-#define KERN_ATTR_VECTOR()        KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *g_words_buf_r, void, void, void)
-#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *g_words_buf_r, void, void, e)
-#else
-#define KERN_ATTR_BASIC()         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,     void, void, void)
-#define KERN_ATTR_BITSLICE()      KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bs_word_t *words_buf_s, void, void, void)
-#define KERN_ATTR_ESALT(e)        KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,     void, void, e)
-#define KERN_ATTR_RULES()         KERN_ATTR (CONSTANT_AS, GLOBAL_AS   const bf_t      *bfs_buf,     void, void, void)
-#define KERN_ATTR_RULES_ESALT(e)  KERN_ATTR (CONSTANT_AS, GLOBAL_AS   const bf_t      *bfs_buf,     void, void, e)
-#define KERN_ATTR_TMPS(t)         KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,     t,    void, void)
-#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,     t,    void, e)
-#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS,   GLOBAL_AS   const bf_t      *bfs_buf,     t,    h,    void)
-#define KERN_ATTR_VECTOR()        KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *words_buf_r, void, void, void)
-#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *words_buf_r, void, void, e)
+#define KERN_ATTR_TMPS(t)         KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bf_t      *bfs_buf,       t,    void, void)
+#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bf_t      *bfs_buf,       t,    void, e)
+#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const bf_t      *bfs_buf,       t,    h,    void)
+#define KERN_ATTR_VECTOR()        KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *words_buf_r,   void, void, void)
+#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS,   CONSTANT_AS const u32x      *words_buf_r,   void, void, e)
 #endif
 
 // union based packing
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index 3606804b4..16761cc27 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -18,15 +18,18 @@
 CONSTANT_VK u32 generic_constant[8192]; // 32k
 
 #if   ATTACK_KERN == 0
+#define bfs_buf     g_bfs_buf
 #define rules_buf   ((const kernel_rule_t *) generic_constant)
 #define words_buf_s g_words_buf_s
 #define words_buf_r g_words_buf_r
 #elif ATTACK_KERN == 1
+#define bfs_buf     g_bfs_buf
 #define rules_buf   g_rules_buf
 #define words_buf_s g_words_buf_s
 #define words_buf_r g_words_buf_r
 #elif ATTACK_KERN == 3
 #define rules_buf   g_rules_buf
+#define bfs_buf     ((const bf_t *)      generic_constant)
 #define words_buf_s ((const bs_word_t *) generic_constant)
 #define words_buf_r ((const u32x *)      generic_constant)
 #endif
diff --git a/OpenCL/inc_simd.cl b/OpenCL/inc_simd.cl
index 329270140..7bedf35cb 100644
--- a/OpenCL/inc_simd.cl
+++ b/OpenCL/inc_simd.cl
@@ -11,18 +11,18 @@
 
 // attack-mode 0
 
-DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos)
+DECLSPEC u32x ix_create_bft (CONSTANT_AS const bf_t *arr, const u32 il_pos)
 {
   #if   VECT_SIZE == 1
-  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i);
+  const u32x ix = make_u32x (arr[il_pos + 0].i);
   #elif VECT_SIZE == 2
-  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
+  const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i);
   #elif VECT_SIZE == 4
-  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
+  const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i);
   #elif VECT_SIZE == 8
-  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
+  const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i, arr[il_pos + 4].i, arr[il_pos + 5].i, arr[il_pos + 6].i, arr[il_pos + 7].i);
   #elif VECT_SIZE == 16
-  const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i);
+  const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i, arr[il_pos + 4].i, arr[il_pos + 5].i, arr[il_pos + 6].i, arr[il_pos + 7].i, arr[il_pos + 8].i, arr[il_pos + 9].i, arr[il_pos + 10].i, arr[il_pos + 11].i, arr[il_pos + 12].i, arr[il_pos + 13].i, arr[il_pos + 14].i, arr[il_pos + 15].i);
   #endif
 
   return ix;
@@ -30,35 +30,35 @@ DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos)
 
 // attack-mode 1
 
-DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos)
+DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos)
 {
   #if   VECT_SIZE == 1
-  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len);
+  const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len);
   #elif VECT_SIZE == 2
-  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len);
+  const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len);
   #elif VECT_SIZE == 4
-  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len);
+  const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len);
   #elif VECT_SIZE == 8
-  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len);
+  const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len, arr[il_pos + 4].pw_len, arr[il_pos + 5].pw_len, arr[il_pos + 6].pw_len, arr[il_pos + 7].pw_len);
   #elif VECT_SIZE == 16
-  const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len);
+  const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len, arr[il_pos + 4].pw_len, arr[il_pos + 5].pw_len, arr[il_pos + 6].pw_len, arr[il_pos + 7].pw_len, arr[il_pos + 8].pw_len, arr[il_pos + 9].pw_len, arr[il_pos + 10].pw_len, arr[il_pos + 11].pw_len, arr[il_pos + 12].pw_len, arr[il_pos + 13].pw_len, arr[il_pos + 14].pw_len, arr[il_pos + 15].pw_len);
   #endif
 
   return pw_lenx;
 }
 
-DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos, const int idx)
+DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos, const int idx)
 {
   #if   VECT_SIZE == 1
-  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx]);
+  const u32x ix = make_u32x (arr[il_pos + 0].i[idx]);
   #elif VECT_SIZE == 2
-  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]);
+  const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx]);
   #elif VECT_SIZE == 4
-  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]);
+  const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx]);
   #elif VECT_SIZE == 8
-  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]);
+  const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx], arr[il_pos + 4].i[idx], arr[il_pos + 5].i[idx], arr[il_pos + 6].i[idx], arr[il_pos + 7].i[idx]);
   #elif VECT_SIZE == 16
-  const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]);
+  const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx], arr[il_pos + 4].i[idx], arr[il_pos + 5].i[idx], arr[il_pos + 6].i[idx], arr[il_pos + 7].i[idx], arr[il_pos + 8].i[idx], arr[il_pos + 9].i[idx], arr[il_pos + 10].i[idx], arr[il_pos + 11].i[idx], arr[il_pos + 12].i[idx], arr[il_pos + 13].i[idx], arr[il_pos + 14].i[idx], arr[il_pos + 15].i[idx]);
   #endif
 
   return ix;
diff --git a/OpenCL/inc_simd.h b/OpenCL/inc_simd.h
index 4d0d359cf..89ba41dab 100644
--- a/OpenCL/inc_simd.h
+++ b/OpenCL/inc_simd.h
@@ -1133,8 +1133,8 @@
 #define unpackv_xor(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] ^= val.s0; (arr)[((gid) * 16) + 1].var[(idx)] ^= val.s1; (arr)[((gid) * 16) + 2].var[(idx)] ^= val.s2; (arr)[((gid) * 16) + 3].var[(idx)] ^= val.s3; (arr)[((gid) * 16) + 4].var[(idx)] ^= val.s4; (arr)[((gid) * 16) + 5].var[(idx)] ^= val.s5; (arr)[((gid) * 16) + 6].var[(idx)] ^= val.s6; (arr)[((gid) * 16) + 7].var[(idx)] ^= val.s7; (arr)[((gid) * 16) + 8].var[(idx)] ^= val.s8; (arr)[((gid) * 16) + 9].var[(idx)] ^= val.s9; (arr)[((gid) * 16) + 10].var[(idx)] ^= val.sa; (arr)[((gid) * 16) + 11].var[(idx)] ^= val.sb; (arr)[((gid) * 16) + 12].var[(idx)] ^= val.sc; (arr)[((gid) * 16) + 13].var[(idx)] ^= val.sd; (arr)[((gid) * 16) + 14].var[(idx)] ^= val.se; (arr)[((gid) * 16) + 15].var[(idx)] ^= val.sf;
 #endif
 
-DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos);
-DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos);
-DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos, const int idx);
+DECLSPEC u32x ix_create_bft       (CONSTANT_AS const bf_t *arr, const u32 il_pos);
+DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS   const pw_t *arr, const u32 il_pos);
+DECLSPEC u32x ix_create_combt     (GLOBAL_AS   const pw_t *arr, const u32 il_pos, const int idx);
 
 #endif

From a6bc1d3cc03671698cccce89e61d09d20494c478 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 11 May 2019 11:58:18 +0200
Subject: [PATCH 62/73] Experimental kernel-thread autotuner

---
 src/autotune.c             | 86 ++++++++++++++++++++++++++++++++++++++
 src/backend.c              | 77 +++++++++++++++++-----------------
 src/modules/module_01800.c |  9 +---
 src/modules/module_05500.c |  9 +---
 src/modules/module_06221.c |  9 +---
 src/modules/module_06222.c |  9 +---
 src/modules/module_06223.c |  9 +---
 src/modules/module_07700.c |  9 +---
 src/modules/module_07900.c |  9 +---
 src/modules/module_08200.c |  9 +---
 src/modules/module_08500.c |  9 +---
 src/modules/module_11400.c |  9 +---
 src/modules/module_11500.c |  9 +---
 src/modules/module_11760.c |  9 +---
 src/modules/module_11860.c |  9 +---
 src/modules/module_12400.c |  9 +---
 src/modules/module_13711.c |  9 +---
 src/modules/module_13712.c |  9 +---
 src/modules/module_13713.c |  9 +---
 src/modules/module_13721.c |  9 +---
 src/modules/module_13722.c |  9 +---
 src/modules/module_13723.c |  9 +---
 src/modules/module_13731.c |  9 +---
 src/modules/module_13732.c |  9 +---
 src/modules/module_13733.c |  9 +---
 src/modules/module_13741.c |  9 +---
 src/modules/module_13742.c |  9 +---
 src/modules/module_13743.c |  9 +---
 src/modules/module_13751.c |  9 +---
 src/modules/module_13752.c |  9 +---
 src/modules/module_13753.c |  9 +---
 src/modules/module_13761.c |  9 +---
 src/modules/module_13762.c |  9 +---
 src/modules/module_13763.c |  9 +---
 src/modules/module_13771.c |  9 +---
 src/modules/module_13772.c |  9 +---
 src/modules/module_13773.c |  9 +---
 src/modules/module_15900.c |  9 +---
 src/modules/module_16600.c |  9 +---
 src/modules/module_19000.c |  9 +---
 src/modules/module_19100.c |  9 +---
 src/modules/module_19200.c |  9 +---
 src/modules/module_19300.c |  9 +---
 src/modules/module_20011.c |  9 +---
 src/modules/module_20012.c |  9 +---
 src/modules/module_20013.c |  9 +---
 46 files changed, 169 insertions(+), 390 deletions(-)

diff --git a/src/autotune.c b/src/autotune.c
index 90f067d8b..43b5b46bb 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -47,6 +47,53 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
   return exec_msec_prev;
 }
 
+static double try_run_preferred (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
+{
+  hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
+
+  device_param->kernel_params_buf32[28] = 0;
+  device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set
+  device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
+
+  const u32 kernel_power_try = device_param->hardware_power * kernel_accel;
+
+  const u32 kernel_threads_sav = device_param->kernel_threads;
+
+  const double spin_damp_sav = device_param->spin_damp;
+
+  device_param->spin_damp = 0;
+
+  if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+  {
+    if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+    {
+      device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple1;
+
+      run_kernel (hashcat_ctx, device_param, KERN_RUN_1, kernel_power_try, true, 0);
+    }
+    else
+    {
+      device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple4;
+
+      run_kernel (hashcat_ctx, device_param, KERN_RUN_4, kernel_power_try, true, 0);
+    }
+  }
+  else
+  {
+    device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple2;
+
+    run_kernel (hashcat_ctx, device_param, KERN_RUN_2, kernel_power_try, true, 0);
+  }
+
+  device_param->kernel_threads = kernel_threads_sav;
+
+  device_param->spin_damp = spin_damp_sav;
+
+  const double exec_msec_prev = get_avg_exec_time (device_param, 1);
+
+  return exec_msec_prev;
+}
+
 static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
   const hashconfig_t    *hashconfig   = hashcat_ctx->hashconfig;
@@ -249,6 +296,45 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     kernel_accel *= exec_accel_min;
   }
 
+  // start finding best thread count is easier.
+  // it's either the preferred or the maximum thread count
+
+  const u32 kernel_threads_min = device_param->kernel_threads_min;
+  const u32 kernel_threads_max = device_param->kernel_threads_max;
+
+  if (kernel_threads_min < kernel_threads_max)
+  {
+    const double exec_msec_max = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
+
+    u32 preferred_threads = 0;
+
+    if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+      {
+        preferred_threads = device_param->kernel_preferred_wgs_multiple1;
+      }
+      else
+      {
+        preferred_threads = device_param->kernel_preferred_wgs_multiple4;
+      }
+    }
+    else
+    {
+      preferred_threads = device_param->kernel_preferred_wgs_multiple2;
+    }
+
+    if ((preferred_threads >= kernel_threads_min) && (preferred_threads <= kernel_threads_max))
+    {
+      const double exec_msec_preferred = try_run_preferred (hashcat_ctx, device_param, kernel_accel, kernel_loops);
+
+      if (exec_msec_preferred < exec_msec_max)
+      {
+        device_param->kernel_threads = preferred_threads;
+      }
+    }
+  }
+
   if (device_param->is_cuda == true)
   {
     // reset them fake words
diff --git a/src/backend.c b/src/backend.c
index 726594fe6..fdeb12512 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -6808,10 +6808,8 @@ static int get_opencl_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hc_devi
   return 0;
 }
 
-static u32 get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param)
+static u32 get_kernel_threads (const hc_device_param_t *device_param)
 {
-  const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
-
   // a module can force a fixed value
 
   u32 kernel_threads_min = device_param->kernel_threads_min;
@@ -6841,7 +6839,9 @@ static u32 get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param
 
   // complicated kernel tend to confuse OpenCL runtime suggestions for maximum thread size
   // let's workaround that by sticking to their device specific preferred thread size
+  // this section was replaced by autotune
 
+  /*
   if (hashconfig->opts_type & OPTS_TYPE_PREFERED_THREAD)
   {
     if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -6926,6 +6926,7 @@ static u32 get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param
       }
     }
   }
+  */
 
   return kernel_threads;
 }
@@ -8782,7 +8783,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple1 = device_param->device_maxworkgroup_size;
+            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
 
             if (CL_rc == -1) return -1;
 
@@ -8802,7 +8803,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple2 = device_param->device_maxworkgroup_size;
+            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
 
             if (CL_rc == -1) return -1;
 
@@ -8822,7 +8823,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple3 = device_param->device_maxworkgroup_size;
+            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
 
             if (CL_rc == -1) return -1;
           }
@@ -8842,7 +8843,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple4 = device_param->device_maxworkgroup_size;
+            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
 
             if (CL_rc == -1) return -1;
           }
@@ -8867,7 +8868,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple1 = device_param->device_maxworkgroup_size;
+            device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
 
             if (CL_rc == -1) return -1;
 
@@ -8887,7 +8888,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple2 = device_param->device_maxworkgroup_size;
+            device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
 
             if (CL_rc == -1) return -1;
 
@@ -8907,7 +8908,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple3 = device_param->device_maxworkgroup_size;
+            device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
 
             if (CL_rc == -1) return -1;
           }
@@ -8927,7 +8928,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
             if (CL_rc == -1) return -1;
 
-            device_param->kernel_preferred_wgs_multiple4 = device_param->device_maxworkgroup_size;
+            device_param->kernel_preferred_wgs_multiple4 = device_param->cuda_warp_size;
 
             if (CL_rc == -1) return -1;
           }
@@ -8956,7 +8957,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
               if (CL_rc == -1) return -1;
 
-              device_param->kernel_preferred_wgs_multiple_tm = device_param->device_maxworkgroup_size;
+              device_param->kernel_preferred_wgs_multiple_tm = device_param->cuda_warp_size;
 
               if (CL_rc == -1) return -1;
             }
@@ -8981,7 +8982,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (CL_rc == -1) return -1;
 
-        device_param->kernel_preferred_wgs_multiple1 = device_param->device_maxworkgroup_size;
+        device_param->kernel_preferred_wgs_multiple1 = device_param->cuda_warp_size;
 
         if (CL_rc == -1) return -1;
 
@@ -9001,7 +9002,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (CL_rc == -1) return -1;
 
-        device_param->kernel_preferred_wgs_multiple2 = device_param->device_maxworkgroup_size;
+        device_param->kernel_preferred_wgs_multiple2 = device_param->cuda_warp_size;
 
         if (CL_rc == -1) return -1;
 
@@ -9021,7 +9022,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
         if (CL_rc == -1) return -1;
 
-        device_param->kernel_preferred_wgs_multiple3 = device_param->device_maxworkgroup_size;
+        device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
 
         if (CL_rc == -1) return -1;
 
@@ -9043,7 +9044,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple12 = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple12 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9066,7 +9067,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple23 = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple23 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9089,7 +9090,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_init2 = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9112,7 +9113,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_loop2 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9135,7 +9136,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_aux1 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9158,7 +9159,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_aux2 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9181,7 +9182,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_aux3 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9204,7 +9205,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_aux4 = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9224,7 +9225,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (CL_rc == -1) return -1;
 
-      device_param->kernel_preferred_wgs_multiple_memset = device_param->device_maxworkgroup_size;
+      device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size;
 
       if (CL_rc == -1) return -1;
 
@@ -9246,7 +9247,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (CL_rc == -1) return -1;
 
-      device_param->kernel_preferred_wgs_multiple_atinit = device_param->device_maxworkgroup_size;
+      device_param->kernel_preferred_wgs_multiple_atinit = device_param->cuda_warp_size;
 
       if (CL_rc == -1) return -1;
 
@@ -9267,7 +9268,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if (CL_rc == -1) return -1;
 
-      device_param->kernel_preferred_wgs_multiple_decompress = device_param->device_maxworkgroup_size;
+      device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size;
 
       if (CL_rc == -1) return -1;
 
@@ -9299,7 +9300,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_mp_l = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
 
@@ -9317,7 +9318,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
 
@@ -9341,7 +9342,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9359,7 +9360,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_mp = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_mp = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -9388,7 +9389,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (CL_rc == -1) return -1;
 
-          device_param->kernel_preferred_wgs_multiple_amp = device_param->device_maxworkgroup_size;
+          device_param->kernel_preferred_wgs_multiple_amp = device_param->cuda_warp_size;
 
           if (CL_rc == -1) return -1;
         }
@@ -10270,12 +10271,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
-    /**
-     * now everything that depends on threads and accel, basically dynamic workload
-     */
-
-    u32 kernel_threads = get_kernel_threads (hashcat_ctx, device_param);
-
     // this is required because inside the kernels there is this:
     // __local pw_t s_pws[64];
 
@@ -10287,10 +10282,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
       else
       {
-        kernel_threads = MIN (kernel_threads, 64);
+        device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
       }
     }
 
+    /**
+     * now everything that depends on threads and accel, basically dynamic workload
+     */
+
+    const u32 kernel_threads = get_kernel_threads (device_param);
+
     device_param->kernel_threads = kernel_threads;
 
     device_param->hardware_power = device_processors * kernel_threads;
diff --git a/src/modules/module_01800.c b/src/modules/module_01800.c
index 793e17a72..2dc77c887 100644
--- a/src/modules/module_01800.c
+++ b/src/modules/module_01800.c
@@ -431,13 +431,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   char *jit_build_options = NULL;
@@ -569,7 +562,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_05500.c b/src/modules/module_05500.c
index 6aa5968c1..4f001d41c 100644
--- a/src/modules/module_05500.c
+++ b/src/modules/module_05500.c
@@ -86,13 +86,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -435,7 +428,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_06221.c b/src/modules/module_06221.c
index 7decf6170..3e0125822 100644
--- a/src/modules/module_06221.c
+++ b/src/modules/module_06221.c
@@ -109,13 +109,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -277,7 +270,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_06222.c b/src/modules/module_06222.c
index 1f45e1082..6d8c19d19 100644
--- a/src/modules/module_06222.c
+++ b/src/modules/module_06222.c
@@ -109,13 +109,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -277,7 +270,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_06223.c b/src/modules/module_06223.c
index bf00bbbc4..4f317a345 100644
--- a/src/modules/module_06223.c
+++ b/src/modules/module_06223.c
@@ -109,13 +109,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -277,7 +270,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_07700.c b/src/modules/module_07700.c
index 7f213e0d7..df9089e26 100644
--- a/src/modules/module_07700.c
+++ b/src/modules/module_07700.c
@@ -51,13 +51,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -181,7 +174,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_07900.c b/src/modules/module_07900.c
index a0521a15e..61b1be24a 100644
--- a/src/modules/module_07900.c
+++ b/src/modules/module_07900.c
@@ -66,13 +66,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 static void drupal7_decode (u8 digest[64], const u8 buf[44])
 {
   int l;
@@ -471,7 +464,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_08200.c b/src/modules/module_08200.c
index 255f6d37e..9a8dc5772 100644
--- a/src/modules/module_08200.c
+++ b/src/modules/module_08200.c
@@ -82,13 +82,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -282,7 +275,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_08500.c b/src/modules/module_08500.c
index 58fa0184a..bdd563b51 100644
--- a/src/modules/module_08500.c
+++ b/src/modules/module_08500.c
@@ -51,13 +51,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -224,7 +217,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_11400.c b/src/modules/module_11400.c
index 681840b90..f7363e2b2 100644
--- a/src/modules/module_11400.c
+++ b/src/modules/module_11400.c
@@ -78,13 +78,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -474,7 +467,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_11500.c b/src/modules/module_11500.c
index 82aa44375..6d4b3453f 100644
--- a/src/modules/module_11500.c
+++ b/src/modules/module_11500.c
@@ -41,13 +41,6 @@ u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig,
 const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
 const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -155,7 +148,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_11760.c b/src/modules/module_11760.c
index 65358b8a7..0fbcc4eeb 100644
--- a/src/modules/module_11760.c
+++ b/src/modules/module_11760.c
@@ -52,13 +52,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   return false;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -196,7 +189,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_11860.c b/src/modules/module_11860.c
index 6f99de148..92e2d632d 100644
--- a/src/modules/module_11860.c
+++ b/src/modules/module_11860.c
@@ -52,13 +52,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
   return false;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -220,7 +213,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_12400.c b/src/modules/module_12400.c
index 8de967dfe..c024fd835 100644
--- a/src/modules/module_12400.c
+++ b/src/modules/module_12400.c
@@ -66,13 +66,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -241,7 +234,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13711.c b/src/modules/module_13711.c
index 4825fce79..1948988ed 100644
--- a/src/modules/module_13711.c
+++ b/src/modules/module_13711.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13712.c b/src/modules/module_13712.c
index c22f15d0b..4ac231ce5 100644
--- a/src/modules/module_13712.c
+++ b/src/modules/module_13712.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13713.c b/src/modules/module_13713.c
index 5652d6eba..91d69a767 100644
--- a/src/modules/module_13713.c
+++ b/src/modules/module_13713.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13721.c b/src/modules/module_13721.c
index ae0d3b48c..99fcc128d 100644
--- a/src/modules/module_13721.c
+++ b/src/modules/module_13721.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13722.c b/src/modules/module_13722.c
index 201735ad5..17dd5c082 100644
--- a/src/modules/module_13722.c
+++ b/src/modules/module_13722.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13723.c b/src/modules/module_13723.c
index 560707f4f..25bdbe5eb 100644
--- a/src/modules/module_13723.c
+++ b/src/modules/module_13723.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13731.c b/src/modules/module_13731.c
index e1fff1cc3..a0062491b 100644
--- a/src/modules/module_13731.c
+++ b/src/modules/module_13731.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13732.c b/src/modules/module_13732.c
index 89b5970d9..bb6c45321 100644
--- a/src/modules/module_13732.c
+++ b/src/modules/module_13732.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13733.c b/src/modules/module_13733.c
index 9d69d0049..b5be933eb 100644
--- a/src/modules/module_13733.c
+++ b/src/modules/module_13733.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13741.c b/src/modules/module_13741.c
index 3cbccc940..bec68bcf5 100644
--- a/src/modules/module_13741.c
+++ b/src/modules/module_13741.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13742.c b/src/modules/module_13742.c
index 950326d40..27d585271 100644
--- a/src/modules/module_13742.c
+++ b/src/modules/module_13742.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13743.c b/src/modules/module_13743.c
index d317723dc..77f9f2d69 100644
--- a/src/modules/module_13743.c
+++ b/src/modules/module_13743.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13751.c b/src/modules/module_13751.c
index 84451ab96..007ef20c6 100644
--- a/src/modules/module_13751.c
+++ b/src/modules/module_13751.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13752.c b/src/modules/module_13752.c
index f26d91142..70395556c 100644
--- a/src/modules/module_13752.c
+++ b/src/modules/module_13752.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13753.c b/src/modules/module_13753.c
index 7ada76eff..bac3bb312 100644
--- a/src/modules/module_13753.c
+++ b/src/modules/module_13753.c
@@ -136,13 +136,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -320,7 +313,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13761.c b/src/modules/module_13761.c
index 8263b33e2..c7845c394 100644
--- a/src/modules/module_13761.c
+++ b/src/modules/module_13761.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13762.c b/src/modules/module_13762.c
index c742e2639..2cca29da7 100644
--- a/src/modules/module_13762.c
+++ b/src/modules/module_13762.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13763.c b/src/modules/module_13763.c
index 3b8ab3ba5..5f3b4a37e 100644
--- a/src/modules/module_13763.c
+++ b/src/modules/module_13763.c
@@ -137,13 +137,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -321,7 +314,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13771.c b/src/modules/module_13771.c
index 573f4332f..9339bc87d 100644
--- a/src/modules/module_13771.c
+++ b/src/modules/module_13771.c
@@ -140,13 +140,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -324,7 +317,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13772.c b/src/modules/module_13772.c
index 070e37971..b72a010bf 100644
--- a/src/modules/module_13772.c
+++ b/src/modules/module_13772.c
@@ -140,13 +140,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -324,7 +317,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_13773.c b/src/modules/module_13773.c
index c49a6d09c..6820de1bb 100644
--- a/src/modules/module_13773.c
+++ b/src/modules/module_13773.c
@@ -140,13 +140,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_init_selftest (MAYBE_UNUSED const hashconfig_t *hashconfig, hash_t *hash)
 {
   const size_t st_hash_len = strlen (hashconfig->st_hash);
@@ -324,7 +317,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_15900.c b/src/modules/module_15900.c
index e5eb8625b..2b19f5213 100644
--- a/src/modules/module_15900.c
+++ b/src/modules/module_15900.c
@@ -97,13 +97,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   // amdgpu-pro-18.50-708488-ubuntu-18.04: self-test failed
@@ -433,7 +426,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_16600.c b/src/modules/module_16600.c
index 2db263244..909f6c1e9 100644
--- a/src/modules/module_16600.c
+++ b/src/modules/module_16600.c
@@ -60,13 +60,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -233,7 +226,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_19000.c b/src/modules/module_19000.c
index 9243e6166..572ebe560 100644
--- a/src/modules/module_19000.c
+++ b/src/modules/module_19000.c
@@ -57,13 +57,6 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -228,7 +221,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_19100.c b/src/modules/module_19100.c
index 7fe5d2181..44f6275b4 100644
--- a/src/modules/module_19100.c
+++ b/src/modules/module_19100.c
@@ -57,13 +57,6 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   char *jit_build_options = NULL;
@@ -253,7 +246,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_19200.c b/src/modules/module_19200.c
index a70ecc299..77a1c80bf 100644
--- a/src/modules/module_19200.c
+++ b/src/modules/module_19200.c
@@ -59,13 +59,6 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
   char *jit_build_options = NULL;
@@ -255,7 +248,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_19300.c b/src/modules/module_19300.c
index 27a63b4da..4b1da6300 100644
--- a/src/modules/module_19300.c
+++ b/src/modules/module_19300.c
@@ -59,13 +59,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -244,7 +237,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_20011.c b/src/modules/module_20011.c
index 45b75141a..1bcad8cb6 100644
--- a/src/modules/module_20011.c
+++ b/src/modules/module_20011.c
@@ -84,13 +84,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -235,7 +228,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min        = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max      = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max      = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min      = MODULE_DEFAULT;
   module_ctx->module_kern_type               = module_kern_type;
   module_ctx->module_kern_type_dynamic       = MODULE_DEFAULT;
diff --git a/src/modules/module_20012.c b/src/modules/module_20012.c
index 302c1f23b..0dd86e76c 100644
--- a/src/modules/module_20012.c
+++ b/src/modules/module_20012.c
@@ -84,13 +84,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -235,7 +228,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min        = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max      = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max      = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min      = MODULE_DEFAULT;
   module_ctx->module_kern_type               = module_kern_type;
   module_ctx->module_kern_type_dynamic       = MODULE_DEFAULT;
diff --git a/src/modules/module_20013.c b/src/modules/module_20013.c
index 535e0922b..4a7fb4bc8 100644
--- a/src/modules/module_20013.c
+++ b/src/modules/module_20013.c
@@ -84,13 +84,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance
-
-  return kernel_threads_max;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -235,7 +228,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max        = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min        = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max      = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max      = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min      = MODULE_DEFAULT;
   module_ctx->module_kern_type               = module_kern_type;
   module_ctx->module_kern_type_dynamic       = MODULE_DEFAULT;

From 501ed6efec9bbcb598a2ef82243aa4b54e3fbbd8 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 11 May 2019 12:23:18 +0200
Subject: [PATCH 63/73] Update some entries in hashcat.hctune

---
 hashcat.hctune | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/hashcat.hctune b/hashcat.hctune
index caf16bfd9..c746e2de1 100644
--- a/hashcat.hctune
+++ b/hashcat.hctune
@@ -316,22 +316,22 @@ ALIAS_nv_sm50_or_higher                         3       0       8       A
 ALIAS_nv_sm50_or_higher                         3       10      8       A       A
 ALIAS_nv_sm50_or_higher                         3       11      8       A       A
 ALIAS_nv_sm50_or_higher                         3       12      8       A       A
-ALIAS_nv_sm50_or_higher                         3       20      8       A       A
-ALIAS_nv_sm50_or_higher                         3       21      8       A       A
-ALIAS_nv_sm50_or_higher                         3       22      8       A       A
-ALIAS_nv_sm50_or_higher                         3       23      8       A       A
-ALIAS_nv_sm50_or_higher                         3       30      8       A       A
-ALIAS_nv_sm50_or_higher                         3       40      8       A       A
+ALIAS_nv_sm50_or_higher                         3       20      4       A       A
+ALIAS_nv_sm50_or_higher                         3       21      4       A       A
+ALIAS_nv_sm50_or_higher                         3       22      4       A       A
+ALIAS_nv_sm50_or_higher                         3       23      4       A       A
+ALIAS_nv_sm50_or_higher                         3       30      4       A       A
+ALIAS_nv_sm50_or_higher                         3       40      4       A       A
 ALIAS_nv_sm50_or_higher                         3       200     8       A       A
 ALIAS_nv_sm50_or_higher                         3       900     8       A       A
 ALIAS_nv_sm50_or_higher                         3       1000    8       A       A
-ALIAS_nv_sm50_or_higher                         3       1100    8       A       A
+ALIAS_nv_sm50_or_higher                         3       1100    4       A       A
 ALIAS_nv_sm50_or_higher                         3       2400    8       A       A
-ALIAS_nv_sm50_or_higher                         3       2410    8       A       A
-ALIAS_nv_sm50_or_higher                         3       3800    8       A       A
+ALIAS_nv_sm50_or_higher                         3       2410    4       A       A
+ALIAS_nv_sm50_or_higher                         3       3800    4       A       A
 ALIAS_nv_sm50_or_higher                         3       4800    8       A       A
-ALIAS_nv_sm50_or_higher                         3       5500    8       A       A
-ALIAS_nv_sm50_or_higher                         3       9900    8       A       A
+ALIAS_nv_sm50_or_higher                         3       5500    2       A       A
+ALIAS_nv_sm50_or_higher                         3       9900    4       A       A
 ALIAS_nv_sm50_or_higher                         3       16400   8       A       A
 ALIAS_nv_sm50_or_higher                         3       18700   8       A       A
 

From aa8e000d3129380c61a2a657e698c547bf3e857e Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 11 May 2019 14:22:44 +0200
Subject: [PATCH 64/73] Flag OPTS_TYPE_PREFERED_THREAD no longer needed

---
 src/modules/module_00500.c | 3 +--
 src/modules/module_00501.c | 1 -
 src/modules/module_01600.c | 3 +--
 src/modules/module_01800.c | 3 +--
 src/modules/module_06300.c | 3 +--
 src/modules/module_13400.c | 3 +--
 src/modules/module_15300.c | 3 +--
 7 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/modules/module_00500.c b/src/modules/module_00500.c
index 59f67cb83..aef6bd8be 100644
--- a/src/modules/module_00500.c
+++ b/src/modules/module_00500.c
@@ -20,8 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_OS;
 static const char *HASH_NAME      = "md5crypt, MD5 (Unix), Cisco-IOS $1$ (MD5)";
 static const u64   KERN_TYPE      = 500;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_PREFERED_THREAD;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$1$38652870$DUjsu4TTlTsOe/xxZ05uf/";
diff --git a/src/modules/module_00501.c b/src/modules/module_00501.c
index 1e5935523..4eab9da57 100644
--- a/src/modules/module_00501.c
+++ b/src/modules/module_00501.c
@@ -22,7 +22,6 @@ static const char *HASH_NAME      = "Juniper IVE";
 static const u64   KERN_TYPE      = 500;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_PREFERED_THREAD
                                   | OPTS_TYPE_HASH_COPY;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
diff --git a/src/modules/module_01600.c b/src/modules/module_01600.c
index fd84d3f5e..69c62d0b1 100644
--- a/src/modules/module_01600.c
+++ b/src/modules/module_01600.c
@@ -20,8 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_SERVER;
 static const char *HASH_NAME      = "Apache $apr1$ MD5, md5apr1, MD5 (APR)";
 static const u64   KERN_TYPE      = 1600;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_PREFERED_THREAD;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$apr1$62722340$zGjeAwVP2KwY6MtumUI1N/";
diff --git a/src/modules/module_01800.c b/src/modules/module_01800.c
index 2dc77c887..e10175604 100644
--- a/src/modules/module_01800.c
+++ b/src/modules/module_01800.c
@@ -21,8 +21,7 @@ static const char *HASH_NAME      = "sha512crypt $6$, SHA512 (Unix)";
 static const u64   KERN_TYPE      = 1800;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_USES_BITS_64;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_PREFERED_THREAD;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$6$72820166$U4DVzpcYxgw7MVVDGGvB2/H5lRistD5.Ah4upwENR5UtffLR4X4SxSzfREv8z6wVl0jRFX40/KnYVvK4829kD1";
diff --git a/src/modules/module_06300.c b/src/modules/module_06300.c
index 05bd72b93..2d3fd3b34 100644
--- a/src/modules/module_06300.c
+++ b/src/modules/module_06300.c
@@ -20,8 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_OS;
 static const char *HASH_NAME      = "AIX {smd5}";
 static const u64   KERN_TYPE      = 6300;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_PREFERED_THREAD;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "{smd5}17800721$WkGka7tXcrfpUQS6WOQyw/";
diff --git a/src/modules/module_13400.c b/src/modules/module_13400.c
index 1d8c4f4ab..c251e1188 100644
--- a/src/modules/module_13400.c
+++ b/src/modules/module_13400.c
@@ -20,8 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PASSWORD_MANAGER;
 static const char *HASH_NAME      = "KeePass 1 (AES/Twofish) and KeePass 2 (AES)";
 static const u64   KERN_TYPE      = 13400;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_PREFERED_THREAD;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$keepass$*2*24569*0*c40432355cce7348c48053ceea0a28e7d18859c4ea47e3a799c6300861f64b95*265dafcc42e1537ff42e97e1e283c70014133be0fe2d420b4d24c6d57c9d2207*a00e20a852694c15aabb074d61b902fa*48dd553fb96f7996635f2414bfe6a1a8429ef0ffb71a1752abbef31853172c35*a44ae659958ad7fae8c8952cb83f3cf03fec2371ce22a8bf7fac1e687af2f249*1*64*5a26ea376cc5afc955104c334571d30486acbac512a94b75ca82a9e31dd97bf7";
diff --git a/src/modules/module_15300.c b/src/modules/module_15300.c
index 04800456e..e14499356 100644
--- a/src/modules/module_15300.c
+++ b/src/modules/module_15300.c
@@ -22,8 +22,7 @@ static const char *HASH_NAME      = "DPAPI masterkey file v1";
 static const u64   KERN_TYPE      = 15300;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_PREFERED_THREAD;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$DPAPImk$1*1*S-15-21-466364039-425773974-453930460-1925*des3*sha1*24000*b038489dee5ad04e3e3cab4d957258b5*208*cb9b5b7d96a0d2a00305ca403d3fd9c47c561e35b4b2cf3aebfd1d3199a6481d56972be7ebd6c291b199e6f1c2ffaee91978706737e9b1209e6c7d3aa3d8c3c3e38ad1ccfa39400d62c2415961c17fd0bd6b0f7bbd49cc1de1a394e64b7237f56244238da8d37d78";

From 3ca3d1cc60214b34408fe3a9d2fc10923a93f149 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 11 May 2019 14:34:10 +0200
Subject: [PATCH 65/73] Fix kernel_rules variable name

---
 OpenCL/inc_rp_optimized.cl | 6 +++---
 OpenCL/inc_rp_optimized.h  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl
index 8bfbe8b0a..6a21bd688 100644
--- a/OpenCL/inc_rp_optimized.cl
+++ b/OpenCL/inc_rp_optimized.cl
@@ -2355,7 +2355,7 @@ DECLSPEC u32 apply_rules_optimized (CONSTANT_AS const u32 *cmds, u32 *buf0, u32
   return out_len;
 }
 
-DECLSPEC u32x apply_rules_vect_optimized (const u32 *pw_buf0, const u32 *pw_buf1, const u32 pw_len, CONSTANT_AS const kernel_rule_t *rules_buf, const u32 il_pos, u32x *buf0, u32x *buf1)
+DECLSPEC u32x apply_rules_vect_optimized (const u32 *pw_buf0, const u32 *pw_buf1, const u32 pw_len, CONSTANT_AS const kernel_rule_t *kernel_rules, const u32 il_pos, u32x *buf0, u32x *buf1)
 {
   #if VECT_SIZE == 1
 
@@ -2368,7 +2368,7 @@ DECLSPEC u32x apply_rules_vect_optimized (const u32 *pw_buf0, const u32 *pw_buf1
   buf1[2] = pw_buf1[2];
   buf1[3] = pw_buf1[3];
 
-  return apply_rules_optimized (rules_buf[il_pos].cmds, buf0, buf1, pw_len);
+  return apply_rules_optimized (kernel_rules[il_pos].cmds, buf0, buf1, pw_len);
 
   #else
 
@@ -2391,7 +2391,7 @@ DECLSPEC u32x apply_rules_vect_optimized (const u32 *pw_buf0, const u32 *pw_buf1
     tmp1[2] = pw_buf1[2];
     tmp1[3] = pw_buf1[3];
 
-    const u32 tmp_len = apply_rules_optimized (rules_buf[il_pos + i].cmds, tmp0, tmp1, pw_len);
+    const u32 tmp_len = apply_rules_optimized (kernel_rules[il_pos + i].cmds, tmp0, tmp1, pw_len);
 
     switch (i)
     {
diff --git a/OpenCL/inc_rp_optimized.h b/OpenCL/inc_rp_optimized.h
index c52113da0..b6a133086 100644
--- a/OpenCL/inc_rp_optimized.h
+++ b/OpenCL/inc_rp_optimized.h
@@ -123,6 +123,6 @@ DECLSPEC u32 toggle_on_register (const u32 in, const u32 r);
 DECLSPEC u32 rule_op_mangle_title_sep (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
 DECLSPEC u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, u32 *buf0, u32 *buf1, const u32 in_len);
 DECLSPEC u32 apply_rules_optimized (CONSTANT_AS const u32 *cmds, u32 *buf0, u32 *buf1, const u32 len);
-DECLSPEC u32x apply_rules_vect_optimized (const u32 *pw_buf0, const u32 *pw_buf1, const u32 pw_len, CONSTANT_AS const kernel_rule_t *rules_buf, const u32 il_pos, u32x *buf0, u32x *buf1);
+DECLSPEC u32x apply_rules_vect_optimized (const u32 *pw_buf0, const u32 *pw_buf1, const u32 pw_len, CONSTANT_AS const kernel_rule_t *kernel_rules, const u32 il_pos, u32x *buf0, u32x *buf1);
 
 #endif // _INC_RP_OPTIMIZED_H

From fa9d073f9a2a3898c2322dd8f4f3fda3a570b8fd Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 11 May 2019 23:15:58 +0200
Subject: [PATCH 66/73] Manually unroll sha2 hashes

---
 OpenCL/inc_hash_sha224.cl     |  6 ++++++
 OpenCL/inc_hash_sha256.cl     |  6 ++++++
 OpenCL/inc_hash_sha384.cl     |  7 +++++++
 OpenCL/inc_hash_sha512.cl     |  7 +++++++
 OpenCL/m01700_a0-optimized.cl |  7 +++++++
 OpenCL/m01700_a1-optimized.cl |  7 +++++++
 OpenCL/m01700_a3-optimized.cl |  7 +++++++
 OpenCL/m01710_a0-optimized.cl |  7 +++++++
 OpenCL/m01710_a1-optimized.cl |  7 +++++++
 OpenCL/m01710_a3-optimized.cl |  7 +++++++
 OpenCL/m01720_a0-optimized.cl |  7 +++++++
 OpenCL/m01720_a1-optimized.cl |  7 +++++++
 OpenCL/m01720_a3-optimized.cl |  7 +++++++
 OpenCL/m01730_a0-optimized.cl |  7 +++++++
 OpenCL/m01730_a1-optimized.cl |  7 +++++++
 OpenCL/m01730_a3-optimized.cl |  7 +++++++
 OpenCL/m01740_a0-optimized.cl |  7 +++++++
 OpenCL/m01740_a1-optimized.cl |  7 +++++++
 OpenCL/m01740_a3-optimized.cl |  7 +++++++
 OpenCL/m08000_a0-optimized.cl | 12 ++++++++++++
 OpenCL/m08000_a1-optimized.cl | 12 ++++++++++++
 OpenCL/m08000_a3-optimized.cl | 12 ++++++++++++
 OpenCL/m10800_a0-optimized.cl |  7 +++++++
 OpenCL/m10800_a1-optimized.cl |  7 +++++++
 OpenCL/m10800_a3-optimized.cl |  7 +++++++
 OpenCL/m15000_a0-optimized.cl |  7 +++++++
 OpenCL/m15000_a1-optimized.cl |  7 +++++++
 OpenCL/m15000_a3-optimized.cl |  7 +++++++
 src/autotune.c                | 16 ++++++++++++++++
 29 files changed, 225 insertions(+)

diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl
index e93206e44..e3f5b4d1c 100644
--- a/OpenCL/inc_hash_sha224.cl
+++ b/OpenCL/inc_hash_sha224.cl
@@ -104,6 +104,11 @@ DECLSPEC void sha224_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
 
   ROUND_STEP_S (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND_S (); ROUND_STEP_S (16);
+  ROUND_EXPAND_S (); ROUND_STEP_S (32);
+  ROUND_EXPAND_S (); ROUND_STEP_S (48);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -111,6 +116,7 @@ DECLSPEC void sha224_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
   {
     ROUND_EXPAND_S (); ROUND_STEP_S (i);
   }
+  #endif
 
   #undef ROUND_EXPAND_S
   #undef ROUND_STEP_S
diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl
index de2bd5897..d6d0a69b0 100644
--- a/OpenCL/inc_hash_sha256.cl
+++ b/OpenCL/inc_hash_sha256.cl
@@ -104,6 +104,11 @@ DECLSPEC void sha256_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
 
   ROUND_STEP_S (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND_S (); ROUND_STEP_S (16);
+  ROUND_EXPAND_S (); ROUND_STEP_S (32);
+  ROUND_EXPAND_S (); ROUND_STEP_S (48);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -111,6 +116,7 @@ DECLSPEC void sha256_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
   {
     ROUND_EXPAND_S (); ROUND_STEP_S (i);
   }
+  #endif
 
   #undef ROUND_EXPAND_S
   #undef ROUND_STEP_S
diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl
index ea26ec734..61e2f5d3a 100644
--- a/OpenCL/inc_hash_sha384.cl
+++ b/OpenCL/inc_hash_sha384.cl
@@ -108,6 +108,12 @@ DECLSPEC void sha384_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
 
   ROUND_STEP_S (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND_S (); ROUND_STEP_S (16);
+  ROUND_EXPAND_S (); ROUND_STEP_S (32);
+  ROUND_EXPAND_S (); ROUND_STEP_S (48);
+  ROUND_EXPAND_S (); ROUND_STEP_S (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -115,6 +121,7 @@ DECLSPEC void sha384_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
   {
     ROUND_EXPAND_S (); ROUND_STEP_S (i);
   }
+  #endif
 
   #undef ROUND_EXPAND_S
   #undef ROUND_STEP_S
diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl
index 783a66fbe..6dc91f368 100644
--- a/OpenCL/inc_hash_sha512.cl
+++ b/OpenCL/inc_hash_sha512.cl
@@ -108,6 +108,12 @@ DECLSPEC void sha512_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
 
   ROUND_STEP_S (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND_S (); ROUND_STEP_S (16);
+  ROUND_EXPAND_S (); ROUND_STEP_S (32);
+  ROUND_EXPAND_S (); ROUND_STEP_S (48);
+  ROUND_EXPAND_S (); ROUND_STEP_S (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -115,6 +121,7 @@ DECLSPEC void sha512_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
   {
     ROUND_EXPAND_S (); ROUND_STEP_S (i);
   }
+  #endif
 
   #undef ROUND_EXPAND_S
   #undef ROUND_STEP_S
diff --git a/OpenCL/m01700_a0-optimized.cl b/OpenCL/m01700_a0-optimized.cl
index bd71b1678..0d6ddb337 100644
--- a/OpenCL/m01700_a0-optimized.cl
+++ b/OpenCL/m01700_a0-optimized.cl
@@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01700_a1-optimized.cl b/OpenCL/m01700_a1-optimized.cl
index 2716dbdec..abee4dfcb 100644
--- a/OpenCL/m01700_a1-optimized.cl
+++ b/OpenCL/m01700_a1-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01700_a3-optimized.cl b/OpenCL/m01700_a3-optimized.cl
index 5e2820592..c4d8ee016 100644
--- a/OpenCL/m01700_a3-optimized.cl
+++ b/OpenCL/m01700_a3-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01710_a0-optimized.cl b/OpenCL/m01710_a0-optimized.cl
index 6f3130ac9..a5a53e831 100644
--- a/OpenCL/m01710_a0-optimized.cl
+++ b/OpenCL/m01710_a0-optimized.cl
@@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01710_a1-optimized.cl b/OpenCL/m01710_a1-optimized.cl
index 8d8a87380..931142cae 100644
--- a/OpenCL/m01710_a1-optimized.cl
+++ b/OpenCL/m01710_a1-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01710_a3-optimized.cl b/OpenCL/m01710_a3-optimized.cl
index 353d1395b..a82f949ac 100644
--- a/OpenCL/m01710_a3-optimized.cl
+++ b/OpenCL/m01710_a3-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01720_a0-optimized.cl b/OpenCL/m01720_a0-optimized.cl
index 14503946d..c331365f1 100644
--- a/OpenCL/m01720_a0-optimized.cl
+++ b/OpenCL/m01720_a0-optimized.cl
@@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01720_a1-optimized.cl b/OpenCL/m01720_a1-optimized.cl
index fbacd956b..aa93dc2c9 100644
--- a/OpenCL/m01720_a1-optimized.cl
+++ b/OpenCL/m01720_a1-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01720_a3-optimized.cl b/OpenCL/m01720_a3-optimized.cl
index 168cb7f96..891634dd4 100644
--- a/OpenCL/m01720_a3-optimized.cl
+++ b/OpenCL/m01720_a3-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01730_a0-optimized.cl b/OpenCL/m01730_a0-optimized.cl
index 2d1d4d6bc..f5da15e7f 100644
--- a/OpenCL/m01730_a0-optimized.cl
+++ b/OpenCL/m01730_a0-optimized.cl
@@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01730_a1-optimized.cl b/OpenCL/m01730_a1-optimized.cl
index 7515d2cc0..f3cd8d89a 100644
--- a/OpenCL/m01730_a1-optimized.cl
+++ b/OpenCL/m01730_a1-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01730_a3-optimized.cl b/OpenCL/m01730_a3-optimized.cl
index d2f01afd8..e00e5f4ae 100644
--- a/OpenCL/m01730_a3-optimized.cl
+++ b/OpenCL/m01730_a3-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01740_a0-optimized.cl b/OpenCL/m01740_a0-optimized.cl
index 717aba0c0..ee38662e8 100644
--- a/OpenCL/m01740_a0-optimized.cl
+++ b/OpenCL/m01740_a0-optimized.cl
@@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01740_a1-optimized.cl b/OpenCL/m01740_a1-optimized.cl
index aa765e337..0ae6984e0 100644
--- a/OpenCL/m01740_a1-optimized.cl
+++ b/OpenCL/m01740_a1-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m01740_a3-optimized.cl b/OpenCL/m01740_a3-optimized.cl
index 8b6322f85..4b7b1d3df 100644
--- a/OpenCL/m01740_a3-optimized.cl
+++ b/OpenCL/m01740_a3-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl
index f259ea250..310bebbeb 100644
--- a/OpenCL/m08000_a0-optimized.cl
+++ b/OpenCL/m08000_a0-optimized.cl
@@ -86,6 +86,11 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -93,6 +98,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   digest[0] += a;
   digest[1] += b;
@@ -137,6 +143,11 @@ DECLSPEC void sha256_transform_z (u32x *digest)
 
   ROUND_STEP_Z (0);
 
+  #ifdef IS_CUDA
+  ROUND_STEP_Z (16);
+  ROUND_STEP_Z (32);
+  ROUND_STEP_Z (48);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -144,6 +155,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
   {
     ROUND_STEP_Z (i);
   }
+  #endif
 
   digest[0] += a;
   digest[1] += b;
diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl
index 599364f44..89ea42a57 100644
--- a/OpenCL/m08000_a1-optimized.cl
+++ b/OpenCL/m08000_a1-optimized.cl
@@ -84,6 +84,11 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +96,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   digest[0] += a;
   digest[1] += b;
@@ -135,6 +141,11 @@ DECLSPEC void sha256_transform_z (u32x *digest)
 
   ROUND_STEP_Z (0);
 
+  #ifdef IS_CUDA
+  ROUND_STEP_Z (16);
+  ROUND_STEP_Z (32);
+  ROUND_STEP_Z (48);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -142,6 +153,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
   {
     ROUND_STEP_Z (i);
   }
+  #endif
 
   digest[0] += a;
   digest[1] += b;
diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl
index 3068a6621..fa76a3b72 100644
--- a/OpenCL/m08000_a3-optimized.cl
+++ b/OpenCL/m08000_a3-optimized.cl
@@ -84,6 +84,11 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +96,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   digest[0] += a;
   digest[1] += b;
@@ -135,6 +141,11 @@ DECLSPEC void sha256_transform_z (u32x *digest)
 
   ROUND_STEP_Z (0);
 
+  #ifdef IS_CUDA
+  ROUND_STEP_Z (16);
+  ROUND_STEP_Z (32);
+  ROUND_STEP_Z (48);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -142,6 +153,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
   {
     ROUND_STEP_Z (i);
   }
+  #endif
 
   digest[0] += a;
   digest[1] += b;
diff --git a/OpenCL/m10800_a0-optimized.cl b/OpenCL/m10800_a0-optimized.cl
index e9cfd2167..396b389a6 100644
--- a/OpenCL/m10800_a0-optimized.cl
+++ b/OpenCL/m10800_a0-optimized.cl
@@ -86,6 +86,12 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -93,6 +99,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m10800_a1-optimized.cl b/OpenCL/m10800_a1-optimized.cl
index f7828aa9f..11aa95dbd 100644
--- a/OpenCL/m10800_a1-optimized.cl
+++ b/OpenCL/m10800_a1-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m10800_a3-optimized.cl b/OpenCL/m10800_a3-optimized.cl
index f19e1b224..cef22d51f 100644
--- a/OpenCL/m10800_a3-optimized.cl
+++ b/OpenCL/m10800_a3-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m15000_a0-optimized.cl b/OpenCL/m15000_a0-optimized.cl
index a4fe67e48..7b73564a3 100644
--- a/OpenCL/m15000_a0-optimized.cl
+++ b/OpenCL/m15000_a0-optimized.cl
@@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m15000_a1-optimized.cl b/OpenCL/m15000_a1-optimized.cl
index e410b3102..1bcd7a983 100644
--- a/OpenCL/m15000_a1-optimized.cl
+++ b/OpenCL/m15000_a1-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m15000_a3-optimized.cl b/OpenCL/m15000_a3-optimized.cl
index 5ff6d7bfb..cf1ff1432 100644
--- a/OpenCL/m15000_a3-optimized.cl
+++ b/OpenCL/m15000_a3-optimized.cl
@@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
+  #ifdef IS_CUDA
+  ROUND_EXPAND (); ROUND_STEP (16);
+  ROUND_EXPAND (); ROUND_STEP (32);
+  ROUND_EXPAND (); ROUND_STEP (48);
+  ROUND_EXPAND (); ROUND_STEP (64);
+  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
+  #endif
 
   /* rev
   digest[0] += a;
diff --git a/src/autotune.c b/src/autotune.c
index 43b5b46bb..11dc8c1d1 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -47,6 +47,7 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
   return exec_msec_prev;
 }
 
+/*
 static double try_run_preferred (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
 {
   hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
@@ -93,6 +94,7 @@ static double try_run_preferred (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *
 
   return exec_msec_prev;
 }
+*/
 
 static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
@@ -261,6 +263,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     const u32 kernel_accel_orig = kernel_accel;
     const u32 kernel_loops_orig = kernel_loops;
 
+    double exec_msec_prev = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
+
     for (int i = 1; i < STEPS_CNT; i++)
     {
       const u32 kernel_accel_try = kernel_accel_orig * (1u << i);
@@ -272,6 +276,16 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       if (kernel_loops_try > kernel_loops_max) continue;
       if (kernel_loops_try < kernel_loops_min) break;
 
+      // do a real test
+
+      const double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops_try);
+
+      if (exec_msec_prev < exec_msec) break;
+
+      exec_msec_prev = exec_msec;
+
+      // so far, so good! save
+
       kernel_accel = kernel_accel_try;
       kernel_loops = kernel_loops_try;
 
@@ -299,6 +313,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   // start finding best thread count is easier.
   // it's either the preferred or the maximum thread count
 
+  /*
   const u32 kernel_threads_min = device_param->kernel_threads_min;
   const u32 kernel_threads_max = device_param->kernel_threads_max;
 
@@ -334,6 +349,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       }
     }
   }
+  */
 
   if (device_param->is_cuda == true)
   {

From e2da5c8d57427b230316b299897f7c33f63f90f3 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 12 May 2019 12:38:23 +0200
Subject: [PATCH 67/73] Some unrolling for SHA2 based algorithms

---
 OpenCL/inc_hash_sha224.cl     | 6 ------
 OpenCL/inc_hash_sha256.cl     | 6 ------
 OpenCL/inc_hash_sha384.cl     | 7 -------
 OpenCL/inc_hash_sha512.cl     | 7 -------
 OpenCL/m15000_a0-optimized.cl | 7 -------
 OpenCL/m15000_a1-optimized.cl | 7 -------
 OpenCL/m15000_a3-optimized.cl | 7 -------
 7 files changed, 47 deletions(-)

diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl
index e3f5b4d1c..e93206e44 100644
--- a/OpenCL/inc_hash_sha224.cl
+++ b/OpenCL/inc_hash_sha224.cl
@@ -104,11 +104,6 @@ DECLSPEC void sha224_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
 
   ROUND_STEP_S (0);
 
-  #ifdef IS_CUDA
-  ROUND_EXPAND_S (); ROUND_STEP_S (16);
-  ROUND_EXPAND_S (); ROUND_STEP_S (32);
-  ROUND_EXPAND_S (); ROUND_STEP_S (48);
-  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -116,7 +111,6 @@ DECLSPEC void sha224_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
   {
     ROUND_EXPAND_S (); ROUND_STEP_S (i);
   }
-  #endif
 
   #undef ROUND_EXPAND_S
   #undef ROUND_STEP_S
diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl
index d6d0a69b0..de2bd5897 100644
--- a/OpenCL/inc_hash_sha256.cl
+++ b/OpenCL/inc_hash_sha256.cl
@@ -104,11 +104,6 @@ DECLSPEC void sha256_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
 
   ROUND_STEP_S (0);
 
-  #ifdef IS_CUDA
-  ROUND_EXPAND_S (); ROUND_STEP_S (16);
-  ROUND_EXPAND_S (); ROUND_STEP_S (32);
-  ROUND_EXPAND_S (); ROUND_STEP_S (48);
-  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -116,7 +111,6 @@ DECLSPEC void sha256_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
   {
     ROUND_EXPAND_S (); ROUND_STEP_S (i);
   }
-  #endif
 
   #undef ROUND_EXPAND_S
   #undef ROUND_STEP_S
diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl
index 61e2f5d3a..ea26ec734 100644
--- a/OpenCL/inc_hash_sha384.cl
+++ b/OpenCL/inc_hash_sha384.cl
@@ -108,12 +108,6 @@ DECLSPEC void sha384_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
 
   ROUND_STEP_S (0);
 
-  #ifdef IS_CUDA
-  ROUND_EXPAND_S (); ROUND_STEP_S (16);
-  ROUND_EXPAND_S (); ROUND_STEP_S (32);
-  ROUND_EXPAND_S (); ROUND_STEP_S (48);
-  ROUND_EXPAND_S (); ROUND_STEP_S (64);
-  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -121,7 +115,6 @@ DECLSPEC void sha384_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
   {
     ROUND_EXPAND_S (); ROUND_STEP_S (i);
   }
-  #endif
 
   #undef ROUND_EXPAND_S
   #undef ROUND_STEP_S
diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl
index 6dc91f368..783a66fbe 100644
--- a/OpenCL/inc_hash_sha512.cl
+++ b/OpenCL/inc_hash_sha512.cl
@@ -108,12 +108,6 @@ DECLSPEC void sha512_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
 
   ROUND_STEP_S (0);
 
-  #ifdef IS_CUDA
-  ROUND_EXPAND_S (); ROUND_STEP_S (16);
-  ROUND_EXPAND_S (); ROUND_STEP_S (32);
-  ROUND_EXPAND_S (); ROUND_STEP_S (48);
-  ROUND_EXPAND_S (); ROUND_STEP_S (64);
-  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -121,7 +115,6 @@ DECLSPEC void sha512_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
   {
     ROUND_EXPAND_S (); ROUND_STEP_S (i);
   }
-  #endif
 
   #undef ROUND_EXPAND_S
   #undef ROUND_STEP_S
diff --git a/OpenCL/m15000_a0-optimized.cl b/OpenCL/m15000_a0-optimized.cl
index 7b73564a3..a4fe67e48 100644
--- a/OpenCL/m15000_a0-optimized.cl
+++ b/OpenCL/m15000_a0-optimized.cl
@@ -86,12 +86,6 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
-  ROUND_EXPAND (); ROUND_STEP (16);
-  ROUND_EXPAND (); ROUND_STEP (32);
-  ROUND_EXPAND (); ROUND_STEP (48);
-  ROUND_EXPAND (); ROUND_STEP (64);
-  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -99,7 +93,6 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
-  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m15000_a1-optimized.cl b/OpenCL/m15000_a1-optimized.cl
index 1bcd7a983..e410b3102 100644
--- a/OpenCL/m15000_a1-optimized.cl
+++ b/OpenCL/m15000_a1-optimized.cl
@@ -84,12 +84,6 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
-  ROUND_EXPAND (); ROUND_STEP (16);
-  ROUND_EXPAND (); ROUND_STEP (32);
-  ROUND_EXPAND (); ROUND_STEP (48);
-  ROUND_EXPAND (); ROUND_STEP (64);
-  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -97,7 +91,6 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
-  #endif
 
   /* rev
   digest[0] += a;
diff --git a/OpenCL/m15000_a3-optimized.cl b/OpenCL/m15000_a3-optimized.cl
index cf1ff1432..5ff6d7bfb 100644
--- a/OpenCL/m15000_a3-optimized.cl
+++ b/OpenCL/m15000_a3-optimized.cl
@@ -84,12 +84,6 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
 
   ROUND_STEP (0);
 
-  #ifdef IS_CUDA
-  ROUND_EXPAND (); ROUND_STEP (16);
-  ROUND_EXPAND (); ROUND_STEP (32);
-  ROUND_EXPAND (); ROUND_STEP (48);
-  ROUND_EXPAND (); ROUND_STEP (64);
-  #else
   #ifdef _unroll
   #pragma unroll
   #endif
@@ -97,7 +91,6 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
   {
     ROUND_EXPAND (); ROUND_STEP (i);
   }
-  #endif
 
   /* rev
   digest[0] += a;

From c07f9c19c73a2c62906c82b3097359b7781bb128 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 13 May 2019 13:04:59 +0200
Subject: [PATCH 68/73] Reorder the TC/VC/DC header checks

---
 OpenCL/inc_cipher_des.cl        | 18 +++++++++++++
 OpenCL/inc_cipher_kuznyechik.cl | 23 +++++++++++-----
 OpenCL/inc_cipher_twofish.cl    |  6 +++++
 OpenCL/inc_diskcryptor_xts.cl   | 14 ++++++++++
 OpenCL/m06211-pure.cl           | 16 +++++------
 OpenCL/m06212-pure.cl           | 32 +++++++++++-----------
 OpenCL/m06213-pure.cl           | 48 ++++++++++++++++-----------------
 OpenCL/m06221-pure.cl           | 16 +++++------
 OpenCL/m06222-pure.cl           | 32 +++++++++++-----------
 OpenCL/m06223-pure.cl           | 48 ++++++++++++++++-----------------
 OpenCL/m06231-pure.cl           | 16 +++++------
 OpenCL/m06232-pure.cl           | 32 +++++++++++-----------
 OpenCL/m06233-pure.cl           | 48 ++++++++++++++++-----------------
 OpenCL/m13711-pure.cl           |  2 +-
 OpenCL/m13712-pure.cl           |  6 ++---
 OpenCL/m13713-pure.cl           |  8 +++---
 OpenCL/m13721-pure.cl           |  2 +-
 OpenCL/m13722-pure.cl           |  6 ++---
 OpenCL/m13723-pure.cl           |  8 +++---
 OpenCL/m13731-pure.cl           |  2 +-
 OpenCL/m13732-pure.cl           |  6 ++---
 OpenCL/m13733-pure.cl           |  8 +++---
 OpenCL/m13751-pure.cl           |  2 +-
 OpenCL/m13752-pure.cl           |  6 ++---
 OpenCL/m13753-pure.cl           |  8 +++---
 OpenCL/m13771-pure.cl           |  2 +-
 OpenCL/m13772-pure.cl           |  6 ++---
 OpenCL/m13773-pure.cl           |  8 +++---
 OpenCL/m20011-pure.cl           | 16 +++++------
 OpenCL/m20012-pure.cl           | 32 +++++++++++-----------
 OpenCL/m20013-pure.cl           | 48 ++++++++++++++++-----------------
 31 files changed, 287 insertions(+), 238 deletions(-)

diff --git a/OpenCL/inc_cipher_des.cl b/OpenCL/inc_cipher_des.cl
index b247e1e22..6c2e7fca8 100644
--- a/OpenCL/inc_cipher_des.cl
+++ b/OpenCL/inc_cipher_des.cl
@@ -323,6 +323,9 @@ DECLSPEC void _des_crypt_encrypt (u32 *out, const u32 *in, const u32 *Kc, const
   r = hc_rotl32_S (r, 3u);
   l = hc_rotl32_S (l, 3u);
 
+  #ifdef _unroll
+  #pragma unroll
+  #endif
   for (u32 i = 0; i < 16; i += 2)
   {
     u32 u;
@@ -372,6 +375,9 @@ DECLSPEC void _des_crypt_decrypt (u32 *out, const u32 *in, const u32 *Kc, const
   r = hc_rotl32_S (r, 3u);
   l = hc_rotl32_S (l, 3u);
 
+  #ifdef _unroll
+  #pragma unroll
+  #endif
   for (u32 i = 16; i > 0; i -= 2)
   {
     u32 u;
@@ -427,6 +433,9 @@ DECLSPEC void _des_crypt_keysetup (u32 c, u32 d, u32 *Kc, u32 *Kd, SHM_TYPE u32
 
   c = c & 0x0fffffff;
 
+  #ifdef _unroll
+  #pragma unroll
+  #endif
   for (u32 i = 0; i < 16; i++)
   {
     if ((i < 2) || (i == 8) || (i == 15))
@@ -488,6 +497,9 @@ DECLSPEC void _des_crypt_encrypt_vect (u32x *out, const u32x *in, const u32x *Kc
   r = hc_rotl32 (r, 3u);
   l = hc_rotl32 (l, 3u);
 
+  #ifdef _unroll
+  #pragma unroll
+  #endif
   for (u32 i = 0; i < 16; i += 2)
   {
     u32x u;
@@ -537,6 +549,9 @@ DECLSPEC void _des_crypt_decrypt_vect (u32x *out, const u32x *in, const u32x *Kc
   r = hc_rotl32 (r, 3u);
   l = hc_rotl32 (l, 3u);
 
+  #ifdef _unroll
+  #pragma unroll
+  #endif
   for (u32 i = 16; i > 0; i -= 2)
   {
     u32x u;
@@ -592,6 +607,9 @@ DECLSPEC void _des_crypt_keysetup_vect (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_
 
   c = c & 0x0fffffff;
 
+  #ifdef _unroll
+  #pragma unroll
+  #endif
   for (u32 i = 0; i < 16; i++)
   {
     if ((i < 2) || (i == 8) || (i == 15))
diff --git a/OpenCL/inc_cipher_kuznyechik.cl b/OpenCL/inc_cipher_kuznyechik.cl
index a5768f70f..3d9638e23 100644
--- a/OpenCL/inc_cipher_kuznyechik.cl
+++ b/OpenCL/inc_cipher_kuznyechik.cl
@@ -92,12 +92,23 @@ CONSTANT_VK u32a k_sbox_inv[256] =
 
 #define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff)
 
-#define k_lookup(w,sbox)                      \
-  for (int i = 0; i < 4; i++)                 \
-    w[i] = sbox[extract_byte (w[i], 0)] <<  0 \
-         | sbox[extract_byte (w[i], 1)] <<  8 \
-         | sbox[extract_byte (w[i], 2)] << 16 \
-         | sbox[extract_byte (w[i], 3)] << 24
+#define k_lookup(w,sbox)                        \
+    w[0] = sbox[extract_byte (w[0], 0)] <<  0   \
+         | sbox[extract_byte (w[0], 1)] <<  8   \
+         | sbox[extract_byte (w[0], 2)] << 16   \
+         | sbox[extract_byte (w[0], 3)] << 24;  \
+    w[1] = sbox[extract_byte (w[1], 0)] <<  0   \
+         | sbox[extract_byte (w[1], 1)] <<  8   \
+         | sbox[extract_byte (w[1], 2)] << 16   \
+         | sbox[extract_byte (w[1], 3)] << 24;  \
+    w[2] = sbox[extract_byte (w[2], 0)] <<  0   \
+         | sbox[extract_byte (w[2], 1)] <<  8   \
+         | sbox[extract_byte (w[2], 2)] << 16   \
+         | sbox[extract_byte (w[2], 3)] << 24;  \
+    w[3] = sbox[extract_byte (w[3], 0)] <<  0   \
+         | sbox[extract_byte (w[3], 1)] <<  8   \
+         | sbox[extract_byte (w[3], 2)] << 16   \
+         | sbox[extract_byte (w[3], 3)] << 24;
 
 #define k_xor(n)                      \
   for (int i = (n); i > 0; i /= 2)    \
diff --git a/OpenCL/inc_cipher_twofish.cl b/OpenCL/inc_cipher_twofish.cl
index dc0461b24..5ba55e341 100644
--- a/OpenCL/inc_cipher_twofish.cl
+++ b/OpenCL/inc_cipher_twofish.cl
@@ -393,6 +393,9 @@ DECLSPEC void twofish128_set_key (u32 *sk, u32 *lk, const u32 *ukey)
   sk[1] = mds_rem (me_key[0], mo_key[0]);
   sk[0] = mds_rem (me_key[1], mo_key[1]);
 
+  #ifdef _unroll
+  #pragma unroll
+  #endif
   for (int i = 0; i < 40; i += 2)
   {
     u32 a = 0x01010101 * i;
@@ -518,6 +521,9 @@ DECLSPEC void twofish256_set_key (u32 *sk, u32 *lk, const u32 *ukey)
   sk[1] = mds_rem (me_key[2], mo_key[2]);
   sk[0] = mds_rem (me_key[3], mo_key[3]);
 
+  #ifdef _unroll
+  #pragma unroll
+  #endif
   for (int i = 0; i < 40; i += 2)
   {
     u32 a = 0x01010101 * i;
diff --git a/OpenCL/inc_diskcryptor_xts.cl b/OpenCL/inc_diskcryptor_xts.cl
index 2abfdd21d..e643e879f 100644
--- a/OpenCL/inc_diskcryptor_xts.cl
+++ b/OpenCL/inc_diskcryptor_xts.cl
@@ -1,3 +1,17 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.h"
+#include "inc_common.h"
+#include "inc_cipher_aes.h"
+#include "inc_cipher_serpent.h"
+#include "inc_cipher_twofish.h"
+#include "inc_diskcryptor_xts.h"
+
 DECLSPEC void dcrp_xts_mul2 (u32 *in, u32 *out)
 {
   const u32 c = in[3] >> 31;
diff --git a/OpenCL/m06211-pure.cl b/OpenCL/m06211-pure.cl
index 3a857dec9..62e9dd236 100644
--- a/OpenCL/m06211-pure.cl
+++ b/OpenCL/m06211-pure.cl
@@ -374,14 +374,6 @@ KERNEL_FQ void m06211_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey2[6] = tmps[gid].out[14];
   ukey2[7] = tmps[gid].out[15];
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -397,4 +389,12 @@ KERNEL_FQ void m06211_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m06212-pure.cl b/OpenCL/m06212-pure.cl
index 1d4d58bf5..f8b2665b7 100644
--- a/OpenCL/m06212-pure.cl
+++ b/OpenCL/m06212-pure.cl
@@ -374,14 +374,6 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey2[6] = tmps[gid].out[14];
   ukey2[7] = tmps[gid].out[15];
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -398,6 +390,14 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey3[8];
 
   ukey3[0] = tmps[gid].out[16];
@@ -420,14 +420,6 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey4[6] = tmps[gid].out[30];
   ukey4[7] = tmps[gid].out[31];
 
-  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -443,4 +435,12 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m06213-pure.cl b/OpenCL/m06213-pure.cl
index 1be0dca74..310e6fe40 100644
--- a/OpenCL/m06213-pure.cl
+++ b/OpenCL/m06213-pure.cl
@@ -374,14 +374,6 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey2[6] = tmps[gid].out[14];
   ukey2[7] = tmps[gid].out[15];
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -398,6 +390,14 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey3[8];
 
   ukey3[0] = tmps[gid].out[16];
@@ -420,14 +420,6 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey4[6] = tmps[gid].out[30];
   ukey4[7] = tmps[gid].out[31];
 
-  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -444,6 +436,14 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey5[8];
 
   ukey5[0] = tmps[gid].out[32];
@@ -466,14 +466,6 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey6[6] = tmps[gid].out[46];
   ukey6[7] = tmps[gid].out[47];
 
-  if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -481,4 +473,12 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m06221-pure.cl b/OpenCL/m06221-pure.cl
index c15153972..83bead7bf 100644
--- a/OpenCL/m06221-pure.cl
+++ b/OpenCL/m06221-pure.cl
@@ -518,14 +518,6 @@ KERNEL_FQ void m06221_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
   ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7]));
   ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7]));
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -541,4 +533,12 @@ KERNEL_FQ void m06221_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m06222-pure.cl b/OpenCL/m06222-pure.cl
index 4a71b9078..e243eedce 100644
--- a/OpenCL/m06222-pure.cl
+++ b/OpenCL/m06222-pure.cl
@@ -518,14 +518,6 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
   ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7]));
   ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7]));
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -542,6 +534,14 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey3[8];
 
   ukey3[0] = hc_swap32_S (h32_from_64_S (tmps[gid].out[ 8]));
@@ -564,14 +564,6 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
   ukey4[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[15]));
   ukey4[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[15]));
 
-  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -587,4 +579,12 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m06223-pure.cl b/OpenCL/m06223-pure.cl
index 035266e98..58591daa3 100644
--- a/OpenCL/m06223-pure.cl
+++ b/OpenCL/m06223-pure.cl
@@ -518,14 +518,6 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
   ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7]));
   ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7]));
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -542,6 +534,14 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey3[8];
 
   ukey3[0] = hc_swap32_S (h32_from_64_S (tmps[gid].out[ 8]));
@@ -564,14 +564,6 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
   ukey4[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[15]));
   ukey4[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[15]));
 
-  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -588,6 +580,14 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey5[8];
 
   ukey5[0] = hc_swap32_S (h32_from_64_S (tmps[gid].out[16]));
@@ -610,14 +610,6 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
   ukey6[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[23]));
   ukey6[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[23]));
 
-  if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -625,4 +617,12 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl
index 0d4c72c63..a25207ee9 100644
--- a/OpenCL/m06231-pure.cl
+++ b/OpenCL/m06231-pure.cl
@@ -691,14 +691,6 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey2[6] = hc_swap32_S (tmps[gid].out[14]);
   ukey2[7] = hc_swap32_S (tmps[gid].out[15]);
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -714,4 +706,12 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl
index 803f61715..8569df4c1 100644
--- a/OpenCL/m06232-pure.cl
+++ b/OpenCL/m06232-pure.cl
@@ -691,14 +691,6 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey2[6] = hc_swap32_S (tmps[gid].out[14]);
   ukey2[7] = hc_swap32_S (tmps[gid].out[15]);
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -715,6 +707,14 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey3[8];
 
   ukey3[0] = hc_swap32_S (tmps[gid].out[16]);
@@ -737,14 +737,6 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey4[6] = hc_swap32_S (tmps[gid].out[30]);
   ukey4[7] = hc_swap32_S (tmps[gid].out[31]);
 
-  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -760,4 +752,12 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl
index 32553633e..f996cf6f0 100644
--- a/OpenCL/m06233-pure.cl
+++ b/OpenCL/m06233-pure.cl
@@ -691,14 +691,6 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey2[6] = hc_swap32_S (tmps[gid].out[14]);
   ukey2[7] = hc_swap32_S (tmps[gid].out[15]);
 
-  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -715,6 +707,14 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey3[8];
 
   ukey3[0] = hc_swap32_S (tmps[gid].out[16]);
@@ -737,14 +737,6 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey4[6] = hc_swap32_S (tmps[gid].out[30]);
   ukey4[7] = hc_swap32_S (tmps[gid].out[31]);
 
-  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -761,6 +753,14 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
     }
   }
 
+  if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
+
   u32 ukey5[8];
 
   ukey5[0] = hc_swap32_S (tmps[gid].out[32]);
@@ -783,14 +783,6 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
   ukey6[6] = hc_swap32_S (tmps[gid].out[46]);
   ukey6[7] = hc_swap32_S (tmps[gid].out[47]);
 
-  if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[0]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
-    }
-  }
-
   if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[0]) == 0)
@@ -798,4 +790,12 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
     }
   }
+
+  if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[0]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m13711-pure.cl b/OpenCL/m13711-pure.cl
index 7f2551090..a74b318a6 100644
--- a/OpenCL/m13711-pure.cl
+++ b/OpenCL/m13711-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = key[14];
   key2[7] = key[15];
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13712-pure.cl b/OpenCL/m13712-pure.cl
index 0de2c68b1..d754ae7df 100644
--- a/OpenCL/m13712-pure.cl
+++ b/OpenCL/m13712-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = key[14];
   key2[7] = key[15];
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -126,13 +126,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key4[6] = key[30];
   key4[7] = key[31];
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13713-pure.cl b/OpenCL/m13713-pure.cl
index 75384ffe8..db6dc593f 100644
--- a/OpenCL/m13713-pure.cl
+++ b/OpenCL/m13713-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = key[14];
   key2[7] = key[15];
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -126,13 +126,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key4[6] = key[30];
   key4[7] = key[31];
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -195,9 +195,9 @@ DECLSPEC int check_header_1536 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key6[6] = key[46];
   key6[7] = key[47];
 
-  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_twofish_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_serpent_camellia (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6) == 1) return 0;
+  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13721-pure.cl b/OpenCL/m13721-pure.cl
index 660c2cc72..3dbabb69d 100644
--- a/OpenCL/m13721-pure.cl
+++ b/OpenCL/m13721-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key2[6] = hc_swap32_S (h32_from_64_S (key[7]));
   key2[7] = hc_swap32_S (l32_from_64_S (key[7]));
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13722-pure.cl b/OpenCL/m13722-pure.cl
index 32a3ff5f4..9b74f8d41 100644
--- a/OpenCL/m13722-pure.cl
+++ b/OpenCL/m13722-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key2[6] = hc_swap32_S (h32_from_64_S (key[7]));
   key2[7] = hc_swap32_S (l32_from_64_S (key[7]));
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -126,13 +126,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key4[6] = hc_swap32_S (h32_from_64_S (key[15]));
   key4[7] = hc_swap32_S (l32_from_64_S (key[15]));
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13723-pure.cl b/OpenCL/m13723-pure.cl
index c8d036a19..ec722c387 100644
--- a/OpenCL/m13723-pure.cl
+++ b/OpenCL/m13723-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key2[6] = hc_swap32_S (h32_from_64_S (key[7]));
   key2[7] = hc_swap32_S (l32_from_64_S (key[7]));
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -126,13 +126,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key4[6] = hc_swap32_S (h32_from_64_S (key[15]));
   key4[7] = hc_swap32_S (l32_from_64_S (key[15]));
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -195,9 +195,9 @@ DECLSPEC int check_header_1536 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key6[6] = hc_swap32_S (h32_from_64_S (key[23]));
   key6[7] = hc_swap32_S (l32_from_64_S (key[23]));
 
-  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_twofish_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_serpent_camellia (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6) == 1) return 0;
+  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13731-pure.cl b/OpenCL/m13731-pure.cl
index dbb75abcd..b729f07a5 100644
--- a/OpenCL/m13731-pure.cl
+++ b/OpenCL/m13731-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = hc_swap32_S (key[14]);
   key2[7] = hc_swap32_S (key[15]);
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13732-pure.cl b/OpenCL/m13732-pure.cl
index 4857bdc91..057efa707 100644
--- a/OpenCL/m13732-pure.cl
+++ b/OpenCL/m13732-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = hc_swap32_S (key[14]);
   key2[7] = hc_swap32_S (key[15]);
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -126,13 +126,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key4[6] = hc_swap32_S (key[30]);
   key4[7] = hc_swap32_S (key[31]);
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13733-pure.cl b/OpenCL/m13733-pure.cl
index c7cce43af..79c8767cd 100644
--- a/OpenCL/m13733-pure.cl
+++ b/OpenCL/m13733-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = hc_swap32_S (key[14]);
   key2[7] = hc_swap32_S (key[15]);
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -126,13 +126,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key4[6] = hc_swap32_S (key[30]);
   key4[7] = hc_swap32_S (key[31]);
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -195,9 +195,9 @@ DECLSPEC int check_header_1536 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key6[6] = hc_swap32_S (key[46]);
   key6[7] = hc_swap32_S (key[47]);
 
-  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_twofish_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_serpent_camellia (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6) == 1) return 0;
+  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13751-pure.cl b/OpenCL/m13751-pure.cl
index 8ba26eeb7..405d5c277 100644
--- a/OpenCL/m13751-pure.cl
+++ b/OpenCL/m13751-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = hc_swap32_S (key[14]);
   key2[7] = hc_swap32_S (key[15]);
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13752-pure.cl b/OpenCL/m13752-pure.cl
index 48d7f5c99..fbc4c5574 100644
--- a/OpenCL/m13752-pure.cl
+++ b/OpenCL/m13752-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = hc_swap32_S (key[14]);
   key2[7] = hc_swap32_S (key[15]);
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -126,13 +126,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key4[6] = hc_swap32_S (key[30]);
   key4[7] = hc_swap32_S (key[31]);
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13753-pure.cl b/OpenCL/m13753-pure.cl
index 21fddcae8..9e7fd752d 100644
--- a/OpenCL/m13753-pure.cl
+++ b/OpenCL/m13753-pure.cl
@@ -77,11 +77,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key2[6] = hc_swap32_S (key[14]);
   key2[7] = hc_swap32_S (key[15]);
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -126,13 +126,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key4[6] = hc_swap32_S (key[30]);
   key4[7] = hc_swap32_S (key[31]);
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -195,9 +195,9 @@ DECLSPEC int check_header_1536 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
   key6[6] = hc_swap32_S (key[46]);
   key6[7] = hc_swap32_S (key[47]);
 
-  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_twofish_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_serpent_camellia (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6) == 1) return 0;
+  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13771-pure.cl b/OpenCL/m13771-pure.cl
index 7bbbb6a55..821ab8141 100644
--- a/OpenCL/m13771-pure.cl
+++ b/OpenCL/m13771-pure.cl
@@ -80,11 +80,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key2[6] = hc_swap32_S (h32_from_64_S (key[0]));
   key2[7] = hc_swap32_S (l32_from_64_S (key[0]));
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13772-pure.cl b/OpenCL/m13772-pure.cl
index 467ceba89..7e8ef5b83 100644
--- a/OpenCL/m13772-pure.cl
+++ b/OpenCL/m13772-pure.cl
@@ -80,11 +80,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key2[6] = hc_swap32_S (h32_from_64_S (key[0]));
   key2[7] = hc_swap32_S (l32_from_64_S (key[0]));
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -129,13 +129,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key4[6] = hc_swap32_S (h32_from_64_S (key[ 8]));
   key4[7] = hc_swap32_S (l32_from_64_S (key[ 8]));
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m13773-pure.cl b/OpenCL/m13773-pure.cl
index 9d888bfc1..1fad87e9e 100644
--- a/OpenCL/m13773-pure.cl
+++ b/OpenCL/m13773-pure.cl
@@ -80,11 +80,11 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key2[6] = hc_swap32_S (h32_from_64_S (key[0]));
   key2[7] = hc_swap32_S (l32_from_64_S (key[0]));
 
-  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_twofish    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_camellia   (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
   if (verify_header_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2) == 1) return 0;
+  if (verify_header_aes        (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -129,13 +129,13 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key4[6] = hc_swap32_S (h32_from_64_S (key[ 8]));
   key4[7] = hc_swap32_S (l32_from_64_S (key[ 8]));
 
-  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_twofish_serpent     (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_aes_twofish         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_camellia_kuznyechik (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
   if (verify_header_camellia_serpent    (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
-  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_twofish  (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4) == 1) return 0;
+  if (verify_header_kuznyechik_aes      (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
@@ -198,9 +198,9 @@ DECLSPEC int check_header_1536 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u64
   key6[6] = hc_swap32_S (h32_from_64_S (key[16]));
   key6[7] = hc_swap32_S (l32_from_64_S (key[16]));
 
-  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_serpent_twofish_aes         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
   if (verify_header_kuznyechik_serpent_camellia (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6) == 1) return 0;
+  if (verify_header_aes_twofish_serpent         (esalt_bufs[0].data_buf, esalt_bufs[0].signature, key1, key2, key3, key4, key5, key6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) return 0;
 
   return -1;
 }
diff --git a/OpenCL/m20011-pure.cl b/OpenCL/m20011-pure.cl
index 6257b4c42..247191796 100644
--- a/OpenCL/m20011-pure.cl
+++ b/OpenCL/m20011-pure.cl
@@ -405,14 +405,6 @@ KERNEL_FQ void m20011_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
   ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7]));
   ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7]));
 
-  if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
-    }
-  }
-
   if (dcrp_verify_header_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[digests_offset]) == 0)
@@ -428,4 +420,12 @@ KERNEL_FQ void m20011_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
     }
   }
+
+  if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m20012-pure.cl b/OpenCL/m20012-pure.cl
index 7bf0534a6..823cec6e7 100644
--- a/OpenCL/m20012-pure.cl
+++ b/OpenCL/m20012-pure.cl
@@ -405,14 +405,6 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
   ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7]));
   ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7]));
 
-  if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
-    }
-  }
-
   if (dcrp_verify_header_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[digests_offset]) == 0)
@@ -429,6 +421,14 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
     }
   }
 
+  if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
+    }
+  }
+
   u32 ukey3[8];
 
   ukey3[0] = hc_swap32_S (h32_from_64_S (tmps[gid].out[ 8]));
@@ -451,14 +451,6 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
   ukey4[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[15]));
   ukey4[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[15]));
 
-  if (dcrp_verify_header_aes_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
-    }
-  }
-
   if (dcrp_verify_header_serpent_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[digests_offset]) == 0)
@@ -474,4 +466,12 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
     }
   }
+
+  if (dcrp_verify_header_aes_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
+    }
+  }
 }
diff --git a/OpenCL/m20013-pure.cl b/OpenCL/m20013-pure.cl
index 5e6d04dc7..de9a55c45 100644
--- a/OpenCL/m20013-pure.cl
+++ b/OpenCL/m20013-pure.cl
@@ -405,14 +405,6 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
   ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7]));
   ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7]));
 
-  if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
-    }
-  }
-
   if (dcrp_verify_header_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1)
   {
     if (atomic_inc (&hashes_shown[digests_offset]) == 0)
@@ -429,6 +421,14 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
     }
   }
 
+  if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
+    }
+  }
+
   u32 ukey3[8];
 
   ukey3[0] = hc_swap32_S (h32_from_64_S (tmps[gid].out[ 8]));
@@ -451,14 +451,6 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
   ukey4[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[15]));
   ukey4[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[15]));
 
-  if (dcrp_verify_header_aes_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
-    }
-  }
-
   if (dcrp_verify_header_serpent_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[digests_offset]) == 0)
@@ -475,6 +467,14 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
     }
   }
 
+  if (dcrp_verify_header_aes_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
+    }
+  }
+
   u32 ukey5[8];
 
   ukey5[0] = hc_swap32_S (h32_from_64_S (tmps[gid].out[16]));
@@ -497,14 +497,6 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
   ukey6[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[23]));
   ukey6[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[23]));
 
-  if (dcrp_verify_header_aes_twofish_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
-  {
-    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
-    {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
-    }
-  }
-
   if (dcrp_verify_header_serpent_twofish_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
   {
     if (atomic_inc (&hashes_shown[digests_offset]) == 0)
@@ -512,4 +504,12 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt
       mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
     }
   }
+
+  if (dcrp_verify_header_aes_twofish_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
+  {
+    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0);
+    }
+  }
 }

From 51ddf523694e14e8701974d93551e42fa531cbbc Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 13 May 2019 16:23:28 +0200
Subject: [PATCH 69/73] Initialize CUDA vector datatypes to zero

---
 OpenCL/inc_types.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index a1bb0247b..dca2a3b5a 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -70,7 +70,7 @@ struct __device_builtin__ __builtin_align__(2) u8x
   inline __device__  u8x (const u8 a, const u8 b) : s0(a), s1(b) { }
   inline __device__  u8x (const u8 a)             : s0(a), s1(a) { }
 
-  inline __device__  u8x (void) { }
+  inline __device__  u8x (void) : s0(0), s1(0) { }
   inline __device__ ~u8x (void) { }
 };
 
@@ -82,7 +82,7 @@ struct __device_builtin__ __builtin_align__(4) u16x
   inline __device__  u16x (const u16 a, const u16 b) : s0(a), s1(b) { }
   inline __device__  u16x (const u16 a)              : s0(a), s1(a) { }
 
-  inline __device__  u16x (void) { }
+  inline __device__  u16x (void) : s0(0), s1(0) { }
   inline __device__ ~u16x (void) { }
 };
 
@@ -94,7 +94,7 @@ struct __device_builtin__ __builtin_align__(8) u32x
   inline __device__  u32x (const u32 a, const u32 b) : s0(a), s1(b) { }
   inline __device__  u32x (const u32 a)              : s0(a), s1(a) { }
 
-  inline __device__  u32x (void) { }
+  inline __device__  u32x (void) : s0(0), s1(0) { }
   inline __device__ ~u32x (void) { }
 };
 
@@ -106,7 +106,7 @@ struct __device_builtin__ __builtin_align__(16) u64x
   inline __device__  u64x (const u64 a, const u64 b) : s0(a), s1(b) { }
   inline __device__  u64x (const u64 a)              : s0(a), s1(a) { }
 
-  inline __device__  u64x (void) { }
+  inline __device__  u64x (void) : s0(0), s1(0) { }
   inline __device__ ~u64x (void) { }
 };
 
@@ -230,7 +230,7 @@ struct __device_builtin__ __builtin_align__(4) u8x
   inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d) : s0(a), s1(b), s2(c), s3(d) { }
   inline __device__  u8x (const u8 a)                                     : s0(a), s1(a), s2(a), s3(a) { }
 
-  inline __device__  u8x (void) { }
+  inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0) { }
   inline __device__ ~u8x (void) { }
 };
 
@@ -244,7 +244,7 @@ struct __device_builtin__ __builtin_align__(8) u16x
   inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d) : s0(a), s1(b), s2(c), s3(d) { }
   inline __device__  u16x (const u16 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
 
-  inline __device__  u16x (void) { }
+  inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0) { }
   inline __device__ ~u16x (void) { }
 };
 
@@ -258,7 +258,7 @@ struct __device_builtin__ __builtin_align__(16) u32x
   inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d) : s0(a), s1(b), s2(c), s3(d) { }
   inline __device__  u32x (const u32 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
 
-  inline __device__  u32x (void) { }
+  inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0) { }
   inline __device__ ~u32x (void) { }
 };
 
@@ -272,7 +272,7 @@ struct __device_builtin__ __builtin_align__(32) u64x
   inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d) : s0(a), s1(b), s2(c), s3(d) { }
   inline __device__  u64x (const u64 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
 
-  inline __device__  u64x (void) { }
+  inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0) { }
   inline __device__ ~u64x (void) { }
 };
 
@@ -400,7 +400,7 @@ struct __device_builtin__ __builtin_align__(8) u8x
   inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
   inline __device__  u8x (const u8 a)                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
 
-  inline __device__  u8x (void) { }
+  inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
   inline __device__ ~u8x (void) { }
 };
 
@@ -418,7 +418,7 @@ struct __device_builtin__ __builtin_align__(16) u16x
   inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
   inline __device__  u16x (const u16 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
 
-  inline __device__  u16x (void) { }
+  inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
   inline __device__ ~u16x (void) { }
 };
 
@@ -436,7 +436,7 @@ struct __device_builtin__ __builtin_align__(32) u32x
   inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
   inline __device__  u32x (const u32 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
 
-  inline __device__  u32x (void) { }
+  inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
   inline __device__ ~u32x (void) { }
 };
 
@@ -454,7 +454,7 @@ struct __device_builtin__ __builtin_align__(64) u64x
   inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
   inline __device__  u64x (const u64 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
 
-  inline __device__  u64x (void) { }
+  inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
   inline __device__ ~u64x (void) { }
 };
 
@@ -590,7 +590,7 @@ struct __device_builtin__ __builtin_align__(16) u8x
   inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h, const u8 i, const u8 j, const u8 k, const u8 l, const u8 m, const u8 n, const u8 o, const u8 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
   inline __device__  u8x (const u8 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
 
-  inline __device__  u8x (void) { }
+  inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
   inline __device__ ~u8x (void) { }
 };
 
@@ -616,7 +616,7 @@ struct __device_builtin__ __builtin_align__(32) u16x
   inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h, const u16 i, const u16 j, const u16 k, const u16 l, const u16 m, const u16 n, const u16 o, const u16 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
   inline __device__  u16x (const u16 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
 
-  inline __device__  u16x (void) { }
+  inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
   inline __device__ ~u16x (void) { }
 };
 
@@ -642,7 +642,7 @@ struct __device_builtin__ __builtin_align__(64) u32x
   inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h, const u32 i, const u32 j, const u32 k, const u32 l, const u32 m, const u32 n, const u32 o, const u32 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
   inline __device__  u32x (const u32 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
 
-  inline __device__  u32x (void) { }
+  inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
   inline __device__ ~u32x (void) { }
 };
 
@@ -668,7 +668,7 @@ struct __device_builtin__ __builtin_align__(128) u64x
   inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h, const u64 i, const u64 j, const u64 k, const u64 l, const u64 m, const u64 n, const u64 o, const u64 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
   inline __device__  u64x (const u64 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
 
-  inline __device__  u64x (void) { }
+  inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
   inline __device__ ~u64x (void) { }
 };
 

From 1943c35e4ac36d46e3aaba7f640f7717cd6e9ce9 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 13 May 2019 17:25:33 +0200
Subject: [PATCH 70/73] Improve kernel-loops detection for slow hashes

---
 src/autotune.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/autotune.c b/src/autotune.c
index 11dc8c1d1..42f2b13de 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -201,7 +201,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   if (1)
   {
-    const double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min);
+    double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min);
 
     if (exec_msec > 2000)
     {
@@ -210,6 +210,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       return -1;
     }
 
+    exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min);
+
     const u32 mm = kernel_loops_max / kernel_loops_min;
 
     if ((exec_msec * mm) > target_msec)

From bca03bb7edd46b7c1c1a68bd13972d41c7339eae Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 14 May 2019 10:09:46 +0200
Subject: [PATCH 71/73] CUDA offers a nice way to query available device
 memory, no need to brute force

---
 src/backend.c | 94 +++++++++++++++++----------------------------------
 1 file changed, 31 insertions(+), 63 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index fdeb12512..0514ec5a2 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -1517,6 +1517,32 @@ int hc_cuModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t
   return 0;
 }
 
+int hc_cuMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
+  CUDA_PTR *cuda = backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemGetInfo (free, total);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemGetInfo(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemGetInfo(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
 
 int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc)
 {
@@ -5555,72 +5581,14 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       // device_available_mem
 
-      #define MAX_ALLOC_CHECKS_CNT  8192
-      #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
+      size_t free  = 0;
+      size_t total = 0;
 
-      device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
+      const int rc_cuMemGetInfo = hc_cuMemGetInfo (hashcat_ctx, &free, &total);
 
-      // OK, so the problem here is the following:
-      // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
-      // but there's no way to ask for available memory on the device.
-      // In combination, most OpenCL runtimes implementation of clCreateBuffer()
-      // are doing so called lazy memory allocation on the device.
-      // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
-      // running on the host we end up with an error type of this:
-      // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
-      // The clEnqueueNDRangeKernel() is because of the lazy allocation
-      // The best way to workaround this problem is if we would be able to ask for available memory,
-      // The idea here is to try to evaluate available memory by allocating it till it errors
+      if (rc_cuMemGetInfo == -1) return -1;
 
-      CUdeviceptr *tmp_device = (CUdeviceptr *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (CUdeviceptr));
-
-      u64 c;
-
-      for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
-      {
-        if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
-
-        CUresult rc_tmp;
-
-        CUDA_PTR *cuda = backend_ctx->cuda;
-
-        rc_tmp = cuda->cuMemAlloc (&tmp_device[c], MAX_ALLOC_CHECKS_SIZE);
-
-        if (rc_tmp != CUDA_SUCCESS)
-        {
-          c--;
-
-          break;
-        }
-
-        char tmp_host[8];
-
-        rc_tmp = cuda->cuMemcpyDtoH (tmp_host, tmp_device[c], sizeof (tmp_host));
-
-        if (rc_tmp != CUDA_SUCCESS) break;
-
-        rc_tmp = cuda->cuMemcpyHtoD (tmp_device[c], tmp_host, sizeof (tmp_host));
-
-        if (rc_tmp != CUDA_SUCCESS) break;
-      }
-
-      device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
-
-      // clean up
-
-      for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
-      {
-        if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
-
-        if (tmp_device[c] != 0)
-        {
-          const int rc_cuMemFree = hc_cuMemFree (hashcat_ctx, tmp_device[c]);
-
-          if (rc_cuMemFree == -1) return -1;
-        }
-      }
-
-      hcfree (tmp_device);
+      device_param->device_available_mem = (u64) free;
 
       const int rc_cuCtxDestroy = hc_cuCtxDestroy (hashcat_ctx, cuda_context);
 

From 523e0f71515a61f866f90487938cec3dd173a1f3 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 14 May 2019 10:25:49 +0200
Subject: [PATCH 72/73] Fix free unallocated memory in case OpenCL
 initialization failed

---
 src/backend.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 0514ec5a2..5ad504460 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5255,20 +5255,23 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
 
   if (backend_ctx->enabled == false) return;
 
+  hcfree (backend_ctx->devices_param);
+
+  if (backend_ctx->ocl)
+  {
+    hcfree (backend_ctx->opencl_platforms);
+    hcfree (backend_ctx->opencl_platforms_devices);
+    hcfree (backend_ctx->opencl_platforms_devices_cnt);
+    hcfree (backend_ctx->opencl_platforms_name);
+    hcfree (backend_ctx->opencl_platforms_vendor);
+    hcfree (backend_ctx->opencl_platforms_vendor_id);
+    hcfree (backend_ctx->opencl_platforms_version);
+  }
+
   nvrtc_close (hashcat_ctx);
   cuda_close  (hashcat_ctx);
   ocl_close   (hashcat_ctx);
 
-  hcfree (backend_ctx->devices_param);
-
-  hcfree (backend_ctx->opencl_platforms);
-  hcfree (backend_ctx->opencl_platforms_devices);
-  hcfree (backend_ctx->opencl_platforms_devices_cnt);
-  hcfree (backend_ctx->opencl_platforms_name);
-  hcfree (backend_ctx->opencl_platforms_vendor);
-  hcfree (backend_ctx->opencl_platforms_vendor_id);
-  hcfree (backend_ctx->opencl_platforms_version);
-
   memset (backend_ctx, 0, sizeof (backend_ctx_t));
 }
 

From f248d5b7911d2eaa9aed0169f6dbbc123a6f0b84 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 14 May 2019 10:59:33 +0200
Subject: [PATCH 73/73] Fix hashcat.hctune entry for Tegra X1

---
 hashcat.hctune | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hashcat.hctune b/hashcat.hctune
index c746e2de1..f49236fce 100644
--- a/hashcat.hctune
+++ b/hashcat.hctune
@@ -228,7 +228,7 @@ TITAN_Xp                                        ALIAS_nv_sm50_or_higher
 TITAN_V                                         ALIAS_nv_sm50_or_higher
 TITAN_RTX                                       ALIAS_nv_sm50_or_higher
 
-Tegra_X1                                        ALIAS_nv_sm50_or_higher
+NVIDIA_Tegra_X1                                 ALIAS_nv_sm50_or_higher
 
 GeForce_910M                                    ALIAS_nv_sm50_or_higher
 GeForce_920M                                    ALIAS_nv_sm50_or_higher