From 829d49c8baffcd53200f0b595bace2dfe492af85 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 6 Feb 2022 01:17:19 +0100
Subject: [PATCH] resync src/backend.c

---
 src/backend.c | 1097 +++++++++++++++++++++++++++++--------------------
 1 file changed, 652 insertions(+), 445 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 3b7c9b27c..c8f3ae862 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5625,6 +5625,383 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->hip_devices_cnt     = hip_devices_cnt;
   backend_ctx->hip_devices_active  = hip_devices_active;
 
+  // Metal
+
+  int metal_devices_cnt    = 0;
+  int metal_devices_active = 0;
+
+  #if defined (__APPLE__)
+  if (backend_ctx->mtl)
+  {
+    // device count
+
+    if (hc_mtlDeviceGetCount (hashcat_ctx, &metal_devices_cnt) == -1)
+    {
+      mtl_close (hashcat_ctx);
+    }
+
+    backend_ctx->metal_devices_cnt = metal_devices_cnt;
+
+    // device specific
+
+    for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++, backend_devices_idx++)
+    {
+      const u32 device_id = backend_devices_idx;
+
+      hc_device_param_t *device_param = &devices_param[backend_devices_idx];
+
+      device_param->device_id = device_id;
+
+      backend_ctx->backend_device_from_metal[metal_devices_idx] = backend_devices_idx;
+
+      mtl_device_id metal_device = NULL;
+
+      if (hc_mtlDeviceGet (hashcat_ctx, &metal_device, metal_devices_idx) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->metal_device = metal_device;
+
+      device_param->is_cuda   = false;
+      device_param->is_hip    = false;
+      device_param->is_metal  = true;
+      device_param->is_opencl = false;
+
+      device_param->use_opencl12 = false;
+      device_param->use_opencl20 = false;
+      device_param->use_opencl21 = false;
+
+      device_param->is_apple_silicon = is_apple_silicon();
+
+      // some attributes have to be hardcoded values because they are used for instance in the build options
+
+      device_param->device_local_mem_type     = CL_LOCAL;
+      device_param->opencl_device_type        = CL_DEVICE_TYPE_GPU;
+      device_param->opencl_device_vendor_id   = VENDOR_ID_APPLE;
+      device_param->opencl_platform_vendor_id = VENDOR_ID_APPLE;
+
+      // or in the cached kernel checksum
+
+      device_param->opencl_device_version     = "";
+      device_param->opencl_driver_version     = "";
+
+      // or just to make sure they are not NULL
+
+      device_param->opencl_device_vendor     = strdup ("Apple");
+      device_param->opencl_device_c_version  = "";
+
+      // sm_minor, sm_major
+
+      int mtl_major = 0;
+      int mtl_minor = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &mtl_major, MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &mtl_minor, MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->mtl_major = mtl_major;
+      device_param->mtl_minor = mtl_minor;
+
+      // device_name
+
+      char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY);
+
+      if (hc_mtlDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        hcfree (device_name);
+        continue;
+      }
+
+      device_param->device_name = device_name;
+
+      hc_string_trim_leading (device_name);
+
+      hc_string_trim_trailing (device_name);
+
+      // device_processors
+
+      int device_processors = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_processors, MTL_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_processors = device_processors;
+
+      // device_host_unified_memory
+
+      int device_host_unified_memory = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_host_unified_memory, MTL_DEVICE_ATTRIBUTE_UNIFIED_MEMORY, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_host_unified_memory = device_host_unified_memory;
+
+      // device_global_mem, device_available_mem
+
+      size_t bytes = 0;
+
+      if (hc_mtlDeviceTotalMem (hashcat_ctx, &bytes, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_global_mem = (u64) bytes;
+
+      device_param->device_available_mem = 0;
+
+      // device_maxmem_alloc
+
+      size_t device_maxmem_alloc = 0;
+
+      if (hc_mtlDeviceMaxMemAlloc (hashcat_ctx, &device_maxmem_alloc, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_maxmem_alloc = device_maxmem_alloc;
+
+      if (device_host_unified_memory == 1) device_param->device_maxmem_alloc /= 2;
+
+      // warp size
+
+      int metal_warp_size = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &metal_warp_size, MTL_DEVICE_ATTRIBUTE_WARP_SIZE, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->metal_warp_size = metal_warp_size;
+
+      // device_maxworkgroup_size
+
+      int device_maxworkgroup_size = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_maxworkgroup_size = device_maxworkgroup_size;
+
+      // max_clock_frequency
+
+      int device_maxclock_frequency = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, MTL_DEVICE_ATTRIBUTE_CLOCK_RATE, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_maxclock_frequency = device_maxclock_frequency / 1000;
+
+      // pcie_bus, pcie_device, pcie_function
+
+      device_param->pcie_domain   = 0;
+      device_param->pcie_bus      = 0;
+      device_param->pcie_device   = 0;
+      device_param->pcie_function = 0;
+
+      int device_physical_location = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_physical_location, MTL_DEVICE_ATTRIBUTE_PHYSICAL_LOCATION, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_physical_location = device_physical_location;
+
+      int device_location_number = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_location_number, MTL_DEVICE_ATTRIBUTE_LOCATION_NUMBER, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_location_number = device_location_number;
+
+      int device_max_transfer_rate = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_max_transfer_rate, MTL_DEVICE_ATTRIBUTE_MAX_TRANSFER_RATE, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_max_transfer_rate = device_max_transfer_rate;
+
+      int device_registryID = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_registryID, MTL_DEVICE_ATTRIBUTE_REGISTRY_ID, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_registryID = device_registryID;
+
+      // kernel_exec_timeout
+
+      device_param->kernel_exec_timeout = 0;
+
+      // wgs_multiple
+
+      device_param->kernel_preferred_wgs_multiple = metal_warp_size;
+
+      // max_shared_memory_per_block
+
+      int max_shared_memory_per_block = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, MTL_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (max_shared_memory_per_block < 32768)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", device_id + 1);
+
+        device_param->skipped = true;
+      }
+
+      device_param->device_local_mem_size = max_shared_memory_per_block;
+
+      // device_max_constant_buffer_size
+
+      int device_max_constant_buffer_size = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, MTL_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      if (device_max_constant_buffer_size < 65536)
+      {
+        event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1);
+
+        device_param->skipped = true;
+      }
+
+      // gpu properties
+
+      int device_is_headless = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_headless, MTL_DEVICE_ATTRIBUTE_HEADLESS, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_is_headless = device_is_headless;
+
+      int device_is_low_power = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_low_power, MTL_DEVICE_ATTRIBUTE_LOW_POWER, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_is_low_power = device_is_low_power;
+
+      int device_is_removable = 0;
+
+      if (hc_mtlDeviceGetAttribute (hashcat_ctx, &device_is_removable, MTL_DEVICE_ATTRIBUTE_REMOVABLE, metal_device) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      device_param->device_is_removable = device_is_removable;
+
+      // skipped
+
+      if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0)
+      {
+        device_param->skipped = true;
+      }
+
+      if ((backend_ctx->opencl_device_types_filter & CL_DEVICE_TYPE_GPU) == 0)
+      {
+        device_param->skipped = true;
+      }
+
+      if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_APPLE))
+      {
+        need_iokit = true;
+      }
+
+      // CPU burning loop damper
+      // Value is given as number between 0-100
+      // By default 8%
+      // in theory not needed with Metal
+
+      device_param->spin_damp = 0;
+
+      // common driver check
+      /*
+      if (device_param->skipped == false)
+      {
+        if ((user_options->force == false) && (user_options->backend_info == false))
+        {
+        }
+
+        // activate device moved below, at end
+      }*/
+
+      // instruction set
+
+      device_param->has_add   = false;
+      device_param->has_addc  = false;
+      device_param->has_sub   = false;
+      device_param->has_subc  = false;
+      device_param->has_bfe   = false;
+      device_param->has_lop3  = false;
+      device_param->has_mov64 = false;
+      device_param->has_prmt  = false;
+
+      // check if we need skip device
+
+      if (device_param->device_processors == 1) device_param->skipped = true;
+
+      /**
+       * activate device
+       */
+
+      if (device_param->skipped == false) metal_devices_active++;
+    }
+  }
+  #endif // __APPLE__
+
+  backend_ctx->metal_devices_cnt     = metal_devices_cnt;
+  backend_ctx->metal_devices_active  = metal_devices_active;
+
   // OCL
 
   int opencl_devices_cnt    = 0;
@@ -5669,6 +6046,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         device_param->is_cuda   = false;
         device_param->is_hip    = false;
+        device_param->is_metal  = false;
         device_param->is_opencl = true;
 
         // store opencl platform i
@@ -5727,7 +6105,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         if (CHECK_BOARD_NAME_AMD)
         {
-          backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+          //backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
           OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
 
@@ -6267,9 +6645,12 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           {
             event_log_error (hashcat_ctx, "* Device #%u: Unstable OpenCL driver detected!", device_id + 1);
 
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives.");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors.");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL);
+            if (user_options->quiet == false)
+            {
+              event_log_warning (hashcat_ctx, "This OpenCL driver may fail kernel compilation or produce false negatives.");
+              event_log_warning (hashcat_ctx, "You can use --force to override, but do not report related errors.");
+              event_log_warning (hashcat_ctx, NULL);
+            }
 
             device_param->skipped = true;
           }
@@ -6324,12 +6705,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           //if (user_options->force == false)
           if (device_param->skipped == false)
           {
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Apple's OpenCL drivers (GPU) are known to be unreliable.", device_id + 1);
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             You have been warned.");
-            //if (user_options->quiet == false) event_log_warning (hashcat_ctx, "  There are many reports of false negatives and other issues.");
-            //if (user_options->quiet == false) event_log_warning (hashcat_ctx, "  This is not a hashcat issue. Other projects report issues with these drivers.");
-            //if (user_options->quiet == false) event_log_warning (hashcat_ctx, "  You can use --force to override, but do not report related errors. You have been warned.");
-            if (user_options->quiet == false) event_log_warning (hashcat_ctx, NULL);
+            if (user_options->quiet == false)
+            {
+              event_log_warning (hashcat_ctx, "* Device #%u: Apple's OpenCL drivers (GPU) are known to be unreliable.", device_id + 1);
+              event_log_warning (hashcat_ctx, "             You have been warned.");
+              //event_log_warning (hashcat_ctx, "  There are many reports of false negatives and other issues.");
+              //event_log_warning (hashcat_ctx, "  This is not a hashcat issue. Other projects report issues with these drivers.");
+              //event_log_warning (hashcat_ctx, "  You can use --force to override, but do not report related errors. You have been warned.");
+              event_log_warning (hashcat_ctx, NULL);
+            }
 
             //device_param->skipped = true;
           }
@@ -6720,16 +7104,22 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
                 if (device_param->sm_major < 5)
                 {
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+                  if (user_options->quiet == false)
+                  {
+                    event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated CUDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor);
+                    event_log_warning (hashcat_ctx, "             For modern OpenCL performance, upgrade to hardware that supports");
+                    event_log_warning (hashcat_ctx, "             CUDA compute capability version 5.0 (Maxwell) or higher.");
+                  }
                 }
 
                 if (device_param->kernel_exec_timeout != 0)
                 {
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
-                  if (user_options->quiet == false) event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+                  if (user_options->quiet == false)
+                  {
+                    event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1);
+                    event_log_warning (hashcat_ctx, "             This may cause \"CL_OUT_OF_RESOURCES\" or related errors.");
+                    event_log_warning (hashcat_ctx, "             To disable the timeout, see: https://hashcat.net/q/timeoutpatch");
+                  }
                 }
               }
             }
@@ -6750,8 +7140,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   // all devices combined go into backend_* variables
 
-  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + hip_devices_cnt    + opencl_devices_cnt;
-  backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + opencl_devices_active;
+  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + hip_devices_cnt    + metal_devices_cnt    + opencl_devices_cnt;
+  backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + metal_devices_active + opencl_devices_active;
 
   // find duplicate devices
 
@@ -6881,6 +7271,108 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
     }
   }
 
+  #if defined (__APPLE__)
+  if (backend_ctx->mtl)
+  {
+    for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
+    {
+      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
+
+      if (device_param->is_metal == false) continue;
+
+      if (user_options->backend_info == false)
+      {
+        // do not ignore in case -I because user expects a value also for skipped devices
+
+        if (device_param->skipped == true) continue;
+      }
+
+      /**
+       * create command-queue
+       */
+
+      mtl_command_queue command_queue;
+
+      if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &command_queue) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+
+      // available device memory
+      // This test causes an GPU memory usage spike.
+      // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist.
+      // We will simply not run it if that device was skipped by the user.
+
+      #define MAX_ALLOC_CHECKS_CNT  8192
+      #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
+
+      device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
+
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
+      {
+        // following the same logic as for OpenCL, explained later
+
+        mtl_mem *tmp_device = (mtl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (mtl_mem));
+
+        u64 c;
+
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+          if (hc_mtlCreateBuffer (hashcat_ctx, device_param->metal_device, MAX_ALLOC_CHECKS_SIZE, NULL, &tmp_device[c]) == -1)
+          {
+            c--;
+
+            break;
+          }
+
+          // transfer only a few byte should be enough to force the runtime to actually allocate the memory
+
+          u8 tmp_host[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break;
+          if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break;
+
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break;
+          if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break;
+        }
+
+        device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
+
+        if (c > 0)
+        {
+          device_param->device_available_mem *= c;
+        }
+
+        // clean up
+
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+          if (tmp_device[c] != NULL)
+          {
+            if (hc_mtlReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1;
+          }
+        }
+
+        hcfree (tmp_device);
+      }
+
+      hc_mtlReleaseCommandQueue (hashcat_ctx, command_queue);
+
+      if (device_param->device_host_unified_memory == 1)
+      {
+        // so, we actually have only half the memory because we need the same buffers on host side
+
+        device_param->device_available_mem /= 2;
+      }
+    }
+  }
+  #endif // __APPLE__
+
   if (backend_ctx->ocl)
   {
     for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
@@ -7072,11 +7564,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           u8 tmp_host[8];
 
           if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
-
           if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
 
           if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
-
           if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
         }
 
@@ -7164,6 +7654,8 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
   backend_ctx->cuda_devices_active    = 0;
   backend_ctx->hip_devices_cnt        = 0;
   backend_ctx->hip_devices_active     = 0;
+  backend_ctx->metal_devices_cnt      = 0;
+  backend_ctx->metal_devices_active   = 0;
   backend_ctx->opencl_devices_cnt     = 0;
   backend_ctx->opencl_devices_active  = 0;
 
@@ -7419,7 +7911,11 @@ static int get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx,
   return 0;
 }
 
-static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module)
+#if defined (__APPLE__)
+static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module, mtl_library *metal_library)
+#else
+static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module, MAYBE_UNUSED void *metal_library)
+#endif
 {
   const hashconfig_t    *hashconfig    = hashcat_ctx->hashconfig;
   const folder_config_t *folder_config = hashcat_ctx->folder_config;
@@ -7841,6 +8337,22 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
       hcfree (binary);
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      mtl_library metal_lib = NULL;
+
+      if (hc_mtlCreateLibraryWithSource (hashcat_ctx, device_param->metal_device, kernel_sources[0], build_options_buf, folder_config->cpath_real, &metal_lib) == -1) return false;
+
+      *metal_library = metal_lib;
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful.", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+    }
+    #endif // __APPLE__
+
     if (device_param->is_opencl == true)
     {
       size_t build_log_size = 0;
@@ -7901,23 +8413,50 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
         hcfree (build_log);
       }
 
-      if (rc_nvrtcCompileProgram == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
+      if (CL_rc == -1) return false;
 
-        return false;
+      // workaround opencl issue with Apple Silicon
+
+      if (strncmp (device_param->device_name, "Apple M", 7) != 0)
+      {
+        cl_program t2[1];
+
+        t2[0] = p1;
+
+        cl_program fin;
+
+        if (hc_clLinkProgram (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, NULL, 1, t2, NULL, NULL, &fin) == -1) return false;
+
+        // it seems errors caused by clLinkProgram() do not go into CL_PROGRAM_BUILD
+        // I couldn't find any information on the web explaining how else to retrieve the error messages from the linker
+
+        *opencl_program = fin;
+
+        hc_clReleaseProgram (hashcat_ctx, p1);
       }
 
-      size_t binary_size = 0;
+      if (cache_disable == false)
+      {
+        size_t binary_size;
 
-      if (hc_nvrtcGetPTXSize (hashcat_ctx, program, &binary_size) == -1) return false;
+        if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof (size_t), &binary_size, NULL) == -1) return false;
 
-      char *binary = (char *) hcmalloc (binary_size);
+        char *binary = (char *) hcmalloc (binary_size);
 
-      if (hc_nvrtcGetPTX (hashcat_ctx, program, binary) == -1) return false;
+        if (hc_clGetProgramInfo (hashcat_ctx, *opencl_program, CL_PROGRAM_BINARIES, sizeof (char *), &binary, NULL) == -1) return false;
 
-      if (hc_nvrtcDestroyProgram (hashcat_ctx, &program) == -1) return false;
+        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
 
+        hcfree (binary);
+      }
+    }
+  }
+  else
+  {
+    if (read_kernel_binary (hashcat_ctx, cached_file, kernel_lengths, kernel_sources) == false) return false;
+
+    if (device_param->is_cuda == true)
+    {
       #define LOG_SIZE 8192
 
       char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
@@ -7954,82 +8493,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
         mod_cnt++;
       }
 
-      #if defined (WITH_CUBIN)
-
-      char *jit_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
-      char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1);
-
-      int jit_cnt = 6;
-
-      CUjit_option jit_opts[7];
-      void *jit_vals[7];
-
-      jit_opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT;
-      jit_vals[0] = (void *) 0;
-
-      jit_opts[1] = CU_JIT_LOG_VERBOSE;
-      jit_vals[1] = (void *) 1;
-
-      jit_opts[2] = CU_JIT_INFO_LOG_BUFFER;
-      jit_vals[2] = (void *) jit_info_log;
-
-      jit_opts[3] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
-      jit_vals[3] = (void *) LOG_SIZE;
-
-      jit_opts[4] = CU_JIT_ERROR_LOG_BUFFER;
-      jit_vals[4] = (void *) jit_error_log;
-
-      jit_opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
-      jit_vals[5] = (void *) LOG_SIZE;
-
-      if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT)
-      {
-        jit_opts[6] = CU_JIT_MAX_REGISTERS;
-        jit_vals[6] = (void *) 128;
-
-        jit_cnt++;
-      }
-
-      CUlinkState state;
-
-      if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      if (hc_cuLinkAddData (hashcat_ctx, state, CU_JIT_INPUT_PTX, binary, binary_size, kernel_name, 0, NULL, NULL) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      void *cubin = NULL;
-
-      size_t cubin_size = 0;
-
-      if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", jit_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", jit_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
-
-      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, cubin, mod_cnt, mod_opts, mod_vals) == -1)
+      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1)
       {
         event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
         event_log_error (hashcat_ctx, "%s", mod_error_log);
@@ -8044,132 +8508,12 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
       event_log_info (hashcat_ctx, NULL);
       #endif
 
-      if (cache_disable == false)
-      {
-        if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false;
-      }
-
-      if (hc_cuLinkDestroy (hashcat_ctx, state) == -1) return false;
-
-      hcfree (jit_info_log);
-      hcfree (jit_error_log);
-
-      #else
-
-      if (hc_cuModuleLoadDataEx (hashcat_ctx, cuda_module, binary, mod_cnt, mod_opts, mod_vals) == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
-        event_log_error (hashcat_ctx, "%s", mod_error_log);
-        event_log_error (hashcat_ctx, NULL);
-
-        return false;
-      }
-
-      #if defined (DEBUG)
-      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file);
-      event_log_info (hashcat_ctx, "%s", mod_info_log);
-      event_log_info (hashcat_ctx, NULL);
-      #endif
-
-      if (cache_disable == false)
-      {
-        if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false;
-      }
-
-      #endif
-
       hcfree (mod_info_log);
       hcfree (mod_error_log);
-
-      hcfree (binary);
     }
 
     if (device_param->is_hip == true)
     {
-      hiprtcProgram program;
-
-      if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false;
-
-      char **hiprtc_options = (char **) hccalloc (6 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
-
-      //hiprtc_options[0] = "--restrict";
-      //hiprtc_options[1] = "--device-as-default-execution-space";
-      //hiprtc_options[2] = "--gpu-architecture";
-
-      hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
-
-      /* 4.3 linux
-      hiprtc_options[1] = "-I";
-      hiprtc_options[2] = "/opt/rocm/hip/bin/include";
-      hiprtc_options[3] = "-I";
-      hiprtc_options[4] = "/opt/rocm/include";
-      hiprtc_options[5] = "-I";
-      */
-
-      hiprtc_options[1] = "-nocudainc";
-      hiprtc_options[2] = "-nocudalib";
-      hiprtc_options[3] = "";
-      hiprtc_options[4] = "";
-
-      // untested but it should work
-      #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
-      hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", "OpenCL");
-      #else
-      hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
-      #endif
-
-      char *hiprtc_options_string = hcstrdup (build_options_buf);
-
-      const int num_options = 6 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 6);
-
-      const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options);
-
-      hcfree (hiprtc_options_string);
-      hcfree (hiprtc_options);
-
-      size_t build_log_size = 0;
-
-      hc_hiprtcGetProgramLogSize (hashcat_ctx, program, &build_log_size);
-
-      #if defined (DEBUG)
-      if ((build_log_size > 1) || (rc_hiprtcCompileProgram == -1))
-      #else
-      if (rc_hiprtcCompileProgram == -1)
-      #endif
-      {
-        char *build_log = (char *) hcmalloc (build_log_size + 1);
-
-        if (hc_hiprtcGetProgramLog (hashcat_ctx, program, build_log) == -1)
-        {
-          hcfree (build_log);
-
-          return false;
-        }
-
-        build_log[build_log_size] = 0;
-
-        puts (build_log);
-
-        hcfree (build_log);
-      }
-
-      if (rc_hiprtcCompileProgram == -1)
-      {
-        event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file);
-
-        return false;
-      }
-
-      size_t binary_size = 0;
-
-      if (hc_hiprtcGetCodeSize (hashcat_ctx, program, &binary_size) == -1) return false;
-
-      char *binary = (char *) hcmalloc (binary_size);
-
-      if (hc_hiprtcGetCode (hashcat_ctx, program, binary) == -1) return false;
-
-      if (hc_hiprtcDestroyProgram (hashcat_ctx, &program) == -1) return false;
-
       #define LOG_SIZE 8192
 
       char *mod_info_log  = (char *) hcmalloc (LOG_SIZE + 1);
@@ -8198,7 +8542,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
       mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes;
       mod_vals[5] = (void *) LOG_SIZE;
 
-      if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1)
+      if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1)
       {
         event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file);
         event_log_error (hashcat_ctx, "%s", mod_error_log);
@@ -8217,6 +8561,22 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
       hcfree (mod_error_log);
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      mtl_library metal_lib = NULL;
+
+      if (hc_mtlCreateLibraryWithFile (hashcat_ctx, device_param->metal_device, cached_file, &metal_lib) == -1) return false;
+
+      *metal_library = metal_lib;
+
+      #if defined (DEBUG)
+      event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful.", device_param->device_id + 1, source_file);
+      event_log_info (hashcat_ctx, NULL);
+      #endif
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       if (hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, opencl_program) == -1) return false;
@@ -8289,9 +8649,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       if ((unstable_warning == true) && (user_options->force == false))
       {
+        char runtime_name[7];
+
+        memset (runtime_name, 0, sizeof (runtime_name));
+
+        if (device_param->is_cuda   == true) memcpy (runtime_name, "CUDA", 4);
+        if (device_param->is_hip    == true) memcpy (runtime_name, "HIP", 3);
+        #if defined (__APPLE__)
+        if (device_param->is_metal  == true) memcpy (runtime_name, "Metal", 5);
+        #endif
+        if (device_param->is_opencl == true) memcpy (runtime_name, "OpenCL", 6);
+
         event_log_warning (hashcat_ctx, "* Device #%u: Skipping (hash-mode %u)", device_id + 1, hashconfig->hash_mode);
-        event_log_warning (hashcat_ctx, "             This is due to a known CUDA/HIP/OpenCL runtime/driver issue (not a hashcat issue)");
+        event_log_warning (hashcat_ctx, "             This is due to a known %s runtime and/or device driver issue (not a hashcat issue)", runtime_name);
         event_log_warning (hashcat_ctx, "             You can use --force to override, but do not report related errors.");
+        event_log_warning (hashcat_ctx, NULL);
 
         backend_runtime_skip_warnings++;
 
@@ -8379,6 +8751,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             vector_width = 1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            // Metal does not support this query
+
+            vector_width = 1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1)
@@ -8404,6 +8785,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             vector_width = 1;
           }
 
+          #if defined (__APPLE__)
+          if (device_param->is_metal == true)
+          {
+            // Metal does not support this query
+
+            vector_width = 1;
+          }
+          #endif
+
           if (device_param->is_opencl == true)
           {
             if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL) == -1)
@@ -8679,6 +9069,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      // set some limits with Metal
+
+      device_param->kernel_threads_max = 128;
+      device_param->kernel_loops_max = 1024;  // autotune go over ...
+    }
+    #endif
+
     /**
      * create context for each device
      */
@@ -8713,6 +9113,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      /**
+       * create command-queue
+       */
+
+      if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1)
+      {
+        device_param->skipped = true;
+        continue;
+      }
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       /*
@@ -8949,10 +9364,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     }
     else
     {
-      // tested on windows, linux, apple intel, apple silicon
       // when is builded with cygwin and msys, cpath_real doesn't work
       #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
-      build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=\"%s\" ", "OpenCL");
+      build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=%s ", "OpenCL");
       #else
       build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -D INCLUDE_PATH=\"%s\" ", folder_config->cpath_real);
       #endif
@@ -9016,10 +9430,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
 
-    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%u",
+    const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%d-%u-%s-%s-%s-%u",
       backend_ctx->comptime,
       backend_ctx->cuda_driver_version,
       backend_ctx->hip_runtimeVersion,
+      backend_ctx->metal_runtimeVersion,
       device_param->is_opencl,
       device_param->opencl_platform_vendor_id,
       device_param->device_name,
@@ -9088,9 +9503,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       char cached_file[256] = { 0 };
 
-      generate_cached_kernel_shared_filename (folder_config->cache_dir, device_name_chksum_amp_mp, cached_file);
+      generate_cached_kernel_shared_filename (folder_config->cache_dir, device_name_chksum_amp_mp, cached_file, device_param->is_metal);
 
-      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared);
+      #if defined (__APPLE__)
+      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared, &device_param->metal_library_shared);
+      #else
+      const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared, NULL);
+      #endif
 
       if (rc_load_kernel == false)
       {
@@ -9311,218 +9730,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size;
       }
 
-      if (device_param->is_opencl == true)
-      {
-        // GPU memset
-
-        if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_memset", &device_param->opencl_kernel_memset) == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset;
-
-        device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size;
-
-        // GPU bzero
-
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_bzero, device_param->cuda_module_shared, "gpu_bzero") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero;
-
-        device_param->kernel_preferred_wgs_multiple_bzero = device_param->cuda_warp_size;
-
-        // GPU autotune init
-
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_atinit, device_param->cuda_module_shared, "gpu_atinit") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit;
-
-        device_param->kernel_preferred_wgs_multiple_atinit = device_param->cuda_warp_size;
-
-        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
-        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
-
-        // GPU decompress
-
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_decompress, device_param->cuda_module_shared, "gpu_decompress") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress;
-
-        device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size;
-
-        // GPU utf8 to utf16le conversion
-
-        if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_utf8toutf16le, device_param->cuda_module_shared, "gpu_utf8_to_utf16") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
-
-        if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le;
-
-        device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->cuda_warp_size;
-      }
-
-      if (device_param->is_hip == true)
-      {
-        // GPU memset
-
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_memset");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset;
-
-        device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size;
-
-        // GPU bzero
-
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_bzero, device_param->hip_module_shared, "gpu_bzero") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_bzero");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_wgs_bzero) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_bzero, &device_param->kernel_local_mem_size_bzero) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_bzero = device_param->device_local_mem_size - device_param->kernel_local_mem_size_bzero;
-
-        device_param->kernel_preferred_wgs_multiple_bzero = device_param->hip_warp_size;
-
-        // GPU autotune init
-
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_atinit, device_param->hip_module_shared, "gpu_atinit") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_atinit");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_atinit = device_param->device_local_mem_size - device_param->kernel_local_mem_size_atinit;
-
-        device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size;
-
-        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem),   device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
-        // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
-
-        // GPU decompress
-
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_decompress, device_param->hip_module_shared, "gpu_decompress") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_decompress");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_decompress = device_param->device_local_mem_size - device_param->kernel_local_mem_size_decompress;
-
-        device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size;
-
-        // GPU utf8 to utf16le conversion
-
-        if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_utf8toutf16le, device_param->hip_module_shared, "gpu_utf8_to_utf16") == -1)
-        {
-          event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s create failed.", device_param->device_id + 1, "gpu_utf8_to_utf16");
-
-          backend_kernel_create_warnings++;
-
-          device_param->skipped_warning = true;
-          continue;
-        }
-
-        if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1;
-
-        if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1;
-
-        device_param->kernel_dynamic_local_mem_size_utf8toutf16le = device_param->device_local_mem_size - device_param->kernel_local_mem_size_utf8toutf16le;
-
-        device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size;
-      }
-
       #if defined (__APPLE__)
       if (device_param->is_metal == true)
       {