mirror of
https://github.com/hashcat/hashcat
synced 2025-01-14 08:17:28 +01:00
Allow OpenCL kernel inline assembly if ROCm drivers was detected
This commit is contained in:
parent
7b71fb803b
commit
51372438fe
@ -2968,7 +2968,7 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
w0[0] = swap32 (w0[0]);
|
||||
w0[1] = swap32 (w0[1]);
|
||||
w0[2] = swap32 (w0[2]);
|
||||
@ -3327,8 +3327,15 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
||||
w3[3] = swap32 (w3[3]);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -3652,6 +3659,7 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -32192,7 +32200,7 @@ static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
w0[0] = swap32_S (w0[0]);
|
||||
w0[1] = swap32_S (w0[1]);
|
||||
w0[2] = swap32_S (w0[2]);
|
||||
@ -32551,8 +32559,15 @@ static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
||||
w3[3] = swap32_S (w3[3]);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
|
@ -272,6 +272,73 @@ static u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
return amd_bytealign (a, b, c);
|
||||
}
|
||||
|
||||
#ifdef IS_AMD_ROCM
|
||||
static u32x __byte_perm (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
#if VECT_SIZE == 1
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c));
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE >= 2
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s0) : "v"(b.s0), "v"(a.s0), "v"(c.s0));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s1) : "v"(b.s1), "v"(a.s1), "v"(c.s1));
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE >= 4
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s0) : "v"(b.s0), "v"(a.s0), "v"(c.s0));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s1) : "v"(b.s1), "v"(a.s1), "v"(c.s1));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s2) : "v"(b.s2), "v"(a.s2), "v"(c.s2));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s3) : "v"(b.s3), "v"(a.s3), "v"(c.s3));
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE >= 8
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s0) : "v"(b.s0), "v"(a.s0), "v"(c.s0));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s1) : "v"(b.s1), "v"(a.s1), "v"(c.s1));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s2) : "v"(b.s2), "v"(a.s2), "v"(c.s2));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s3) : "v"(b.s3), "v"(a.s3), "v"(c.s3));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s4) : "v"(b.s4), "v"(a.s4), "v"(c.s4));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s5) : "v"(b.s5), "v"(a.s5), "v"(c.s5));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s6) : "v"(b.s6), "v"(a.s6), "v"(c.s6));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s7) : "v"(b.s7), "v"(a.s7), "v"(c.s7));
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE >= 16
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s0) : "v"(b.s0), "v"(a.s0), "v"(c.s0));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s1) : "v"(b.s1), "v"(a.s1), "v"(c.s1));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s2) : "v"(b.s2), "v"(a.s2), "v"(c.s2));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s3) : "v"(b.s3), "v"(a.s3), "v"(c.s3));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s4) : "v"(b.s4), "v"(a.s4), "v"(c.s4));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s5) : "v"(b.s5), "v"(a.s5), "v"(c.s5));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s6) : "v"(b.s6), "v"(a.s6), "v"(c.s6));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s7) : "v"(b.s7), "v"(a.s7), "v"(c.s7));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s8) : "v"(b.s8), "v"(a.s8), "v"(c.s8));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s9) : "v"(b.s9), "v"(a.s9), "v"(c.s9));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sa) : "v"(b.sa), "v"(a.sa), "v"(c.sa));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sb) : "v"(b.sb), "v"(a.sb), "v"(c.sb));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sc) : "v"(b.sc), "v"(a.sc), "v"(c.sc));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sd) : "v"(b.sd), "v"(a.sd), "v"(c.sd));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.se) : "v"(b.se), "v"(a.se), "v"(c.se));
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sf) : "v"(b.sf), "v"(a.sf), "v"(c.sf));
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD_ROCM
|
||||
static u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
__asm__ volatile ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c));
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -30,8 +30,13 @@
|
||||
*/
|
||||
|
||||
#if VENDOR_ID == (1 << 0)
|
||||
#if AMD_ROCM == 0
|
||||
#define IS_AMD
|
||||
//#define IS_GENERIC
|
||||
#define IS_AMD_LEGACY
|
||||
#else
|
||||
#define IS_AMD
|
||||
#define IS_AMD_ROCM
|
||||
#endif
|
||||
#elif VENDOR_ID == (1 << 1)
|
||||
#define IS_APPLE
|
||||
#define IS_GENERIC
|
||||
@ -46,7 +51,6 @@
|
||||
#define IS_GENERIC
|
||||
#elif VENDOR_ID == (1 << 5)
|
||||
#define IS_NV
|
||||
//#define IS_GENERIC
|
||||
#elif VENDOR_ID == (1 << 6)
|
||||
#define IS_POCL
|
||||
#define IS_GENERIC
|
||||
|
@ -1029,6 +1029,8 @@ typedef struct hc_device_param
|
||||
char *driver_version;
|
||||
char *device_opencl_version;
|
||||
|
||||
bool is_rocm;
|
||||
|
||||
double nvidia_spin_damp;
|
||||
|
||||
cl_platform_id platform;
|
||||
|
@ -3349,6 +3349,8 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
{
|
||||
// Support for ROCm platform
|
||||
if (atof (device_param->driver_version) >= 1.1) amd_warn = false;
|
||||
|
||||
device_param->is_rocm = true;
|
||||
}
|
||||
#elif defined (_WIN)
|
||||
// AMD Radeon Software 14.9 and higher, should be updated to 15.12
|
||||
@ -4273,9 +4275,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
char build_opts_new[1024] = { 0 };
|
||||
|
||||
#if defined (DEBUG)
|
||||
snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -D VENDOR_ID=%u -D CUDA_ARCH=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll", build_opts, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type);
|
||||
snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll", build_opts, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type);
|
||||
#else
|
||||
snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -D VENDOR_ID=%u -D CUDA_ARCH=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type);
|
||||
snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type);
|
||||
#endif
|
||||
|
||||
if (device_param->device_type & CL_DEVICE_TYPE_CPU)
|
||||
|
Loading…
Reference in New Issue
Block a user