mirror of
https://github.com/hashcat/hashcat
synced 2025-01-10 17:16:22 +01:00
Apply previous blowfish optimization for -m 3200 also on -m 9000 and -m 18600
This commit is contained in:
parent
a172ab7d8a
commit
5ef67a8ab7
@ -39,6 +39,60 @@ typedef VTYPE(uint, VECT_SIZE) u32x;
|
||||
typedef VTYPE(ulong, VECT_SIZE) u64x;
|
||||
#endif
|
||||
|
||||
// unions
|
||||
|
||||
typedef union vconv32
|
||||
{
|
||||
u64 v32;
|
||||
|
||||
struct
|
||||
{
|
||||
u16 v16a;
|
||||
u16 v16b;
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
u8 v8a;
|
||||
u8 v8b;
|
||||
u8 v8c;
|
||||
u8 v8d;
|
||||
};
|
||||
|
||||
} vconv32_t;
|
||||
|
||||
typedef union vconv64
|
||||
{
|
||||
u64 v64;
|
||||
|
||||
struct
|
||||
{
|
||||
u32 v32a;
|
||||
u32 v32b;
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
u16 v16a;
|
||||
u16 v16b;
|
||||
u16 v16c;
|
||||
u16 v16d;
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
u8 v8a;
|
||||
u8 v8b;
|
||||
u8 v8c;
|
||||
u8 v8d;
|
||||
u8 v8e;
|
||||
u8 v8f;
|
||||
u8 v8g;
|
||||
u8 v8h;
|
||||
};
|
||||
|
||||
} vconv64_t;
|
||||
|
||||
DECLSPEC u32 l32_from_64_S (u64 a);
|
||||
DECLSPEC u32 l32_from_64_S (u64 a)
|
||||
{
|
||||
@ -1426,3 +1480,189 @@ typedef struct keyboard_layout_mapping
|
||||
int dst_len;
|
||||
|
||||
} keyboard_layout_mapping_t;
|
||||
|
||||
// functions
|
||||
|
||||
DECLSPEC u8 v8a_from_v32_S (const u32 v32);
|
||||
DECLSPEC u8 v8a_from_v32_S (const u32 v32)
|
||||
{
|
||||
vconv32_t v;
|
||||
|
||||
v.v32 = v32;
|
||||
|
||||
return v.v8a;
|
||||
}
|
||||
|
||||
DECLSPEC u8 v8b_from_v32_S (const u32 v32);
|
||||
DECLSPEC u8 v8b_from_v32_S (const u32 v32)
|
||||
{
|
||||
vconv32_t v;
|
||||
|
||||
v.v32 = v32;
|
||||
|
||||
return v.v8b;
|
||||
}
|
||||
|
||||
DECLSPEC u8 v8c_from_v32_S (const u32 v32);
|
||||
DECLSPEC u8 v8c_from_v32_S (const u32 v32)
|
||||
{
|
||||
vconv32_t v;
|
||||
|
||||
v.v32 = v32;
|
||||
|
||||
return v.v8c;
|
||||
}
|
||||
|
||||
DECLSPEC u8 v8d_from_v32_S (const u32 v32);
|
||||
DECLSPEC u8 v8d_from_v32_S (const u32 v32)
|
||||
{
|
||||
vconv32_t v;
|
||||
|
||||
v.v32 = v32;
|
||||
|
||||
return v.v8d;
|
||||
}
|
||||
|
||||
DECLSPEC u16 v16a_from_v32_S (const u32 v32);
|
||||
DECLSPEC u16 v16a_from_v32_S (const u32 v32)
|
||||
{
|
||||
vconv32_t v;
|
||||
|
||||
v.v32 = v32;
|
||||
|
||||
return v.v16a;
|
||||
}
|
||||
|
||||
DECLSPEC u16 v16b_from_v32_S (const u32 v32);
|
||||
DECLSPEC u16 v16b_from_v32_S (const u32 v32)
|
||||
{
|
||||
vconv32_t v;
|
||||
|
||||
v.v32 = v32;
|
||||
|
||||
return v.v16b;
|
||||
}
|
||||
|
||||
DECLSPEC u32 v32_from_v16ab_S (const u16 v16a, const u16 v16b);
|
||||
DECLSPEC u32 v32_from_v16ab_S (const u16 v16a, const u16 v16b)
|
||||
{
|
||||
vconv32_t v;
|
||||
|
||||
v.v16a = v16a;
|
||||
v.v16b = v16b;
|
||||
|
||||
return v.v32;
|
||||
}
|
||||
|
||||
DECLSPEC u32 v32a_from_v64_S (const u64 v64);
|
||||
DECLSPEC u32 v32a_from_v64_S (const u64 v64)
|
||||
{
|
||||
vconv64_t v;
|
||||
|
||||
v.v64 = v64;
|
||||
|
||||
return v.v32a;
|
||||
}
|
||||
|
||||
DECLSPEC u32 v32b_from_v64_S (const u64 v64);
|
||||
DECLSPEC u32 v32b_from_v64_S (const u64 v64)
|
||||
{
|
||||
vconv64_t v;
|
||||
|
||||
v.v64 = v64;
|
||||
|
||||
return v.v32b;
|
||||
}
|
||||
|
||||
DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b);
|
||||
DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b)
|
||||
{
|
||||
vconv64_t v;
|
||||
|
||||
v.v32a = v32a;
|
||||
v.v32b = v32b;
|
||||
|
||||
return v.v64;
|
||||
}
|
||||
|
||||
// unpack function are similar, but always return u32
|
||||
|
||||
DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32);
|
||||
DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
#if defined IS_NV
|
||||
asm ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32));
|
||||
#elif defined IS_AMD
|
||||
#ifdef HAS_VBFE
|
||||
__asm__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 0) & 0xff;
|
||||
#endif
|
||||
#else
|
||||
r = (v32 >> 0) & 0xff;
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32);
|
||||
DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
#if defined IS_NV
|
||||
asm ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32));
|
||||
#elif defined IS_AMD
|
||||
#ifdef HAS_VBFE
|
||||
__asm__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 8) & 0xff;
|
||||
#endif
|
||||
#else
|
||||
r = (v32 >> 8) & 0xff;
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32);
|
||||
DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
#if defined IS_NV
|
||||
asm ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32));
|
||||
#elif defined IS_AMD
|
||||
#ifdef HAS_VBFE
|
||||
__asm__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 16) & 0xff;
|
||||
#endif
|
||||
#else
|
||||
r = (v32 >> 16) & 0xff;
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32);
|
||||
DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
#if defined IS_NV
|
||||
asm ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32));
|
||||
#elif defined IS_AMD
|
||||
#ifdef HAS_VBFE
|
||||
__asm__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 24) & 0xff;
|
||||
#endif
|
||||
#else
|
||||
r = (v32 >> 24) & 0xff;
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -299,21 +299,21 @@ __constant u32a c_sbox3[256] =
|
||||
0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
|
||||
};
|
||||
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
const u32 r0 = hc_bfe_S ((L), 24, 8); \
|
||||
const u32 r1 = hc_bfe_S ((L), 16, 8); \
|
||||
const u32 r2 = hc_bfe_S ((L), 8, 8); \
|
||||
const u32 r3 = hc_bfe_S ((L), 0, 8); \
|
||||
\
|
||||
tmp = S0[r0]; \
|
||||
tmp += S1[r1]; \
|
||||
tmp ^= S2[r2]; \
|
||||
tmp += S3[r3]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
const u32 r0 = unpack_v8d_from_v32_S ((L)); \
|
||||
const u32 r1 = unpack_v8c_from_v32_S ((L)); \
|
||||
const u32 r2 = unpack_v8b_from_v32_S ((L)); \
|
||||
const u32 r3 = unpack_v8a_from_v32_S ((L)); \
|
||||
\
|
||||
tmp = S0[r0]; \
|
||||
tmp += S1[r1]; \
|
||||
tmp ^= S2[r2]; \
|
||||
tmp += S3[r3]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
}
|
||||
|
||||
#define BF_ENCRYPT(L,R) \
|
||||
@ -658,9 +658,6 @@ __kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m032
|
||||
L0 = 0;
|
||||
R0 = 0;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < 9; i++)
|
||||
{
|
||||
BF_ENCRYPT (L0, R0);
|
||||
@ -723,9 +720,6 @@ __kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m032
|
||||
L0 = 0;
|
||||
R0 = 0;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < 9; i++)
|
||||
{
|
||||
BF_ENCRYPT (L0, R0);
|
||||
|
@ -308,51 +308,22 @@ __constant u32a c_pbox[18] =
|
||||
0x9216d5d9, 0x8979fb1b
|
||||
};
|
||||
|
||||
#ifdef IS_AMD
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
uchar4 c = as_uchar4 ((L)); \
|
||||
\
|
||||
u32 tmp; \
|
||||
\
|
||||
tmp = S0[c.s3]; \
|
||||
tmp += S1[c.s2]; \
|
||||
tmp ^= S2[c.s1]; \
|
||||
tmp += S3[c.s0]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
const u32 r0 = unpack_v8d_from_v32_S ((L)); \
|
||||
const u32 r1 = unpack_v8c_from_v32_S ((L)); \
|
||||
const u32 r2 = unpack_v8b_from_v32_S ((L)); \
|
||||
const u32 r3 = unpack_v8a_from_v32_S ((L)); \
|
||||
\
|
||||
tmp = S0[r0]; \
|
||||
tmp += S1[r1]; \
|
||||
tmp ^= S2[r2]; \
|
||||
tmp += S3[r3]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
tmp = S0[hc_bfe_S ((L), 24, 8)]; \
|
||||
tmp += S1[hc_bfe_S ((L), 16, 8)]; \
|
||||
tmp ^= S2[hc_bfe_S ((L), 8, 8)]; \
|
||||
tmp += S3[hc_bfe_S ((L), 0, 8)]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_GENERIC
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
uchar4 c = as_uchar4 ((L)); \
|
||||
\
|
||||
u32 tmp; \
|
||||
\
|
||||
tmp = S0[c.s3]; \
|
||||
tmp += S1[c.s2]; \
|
||||
tmp ^= S2[c.s1]; \
|
||||
tmp += S3[c.s0]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define BF_ENCRYPT(L,R) \
|
||||
{ \
|
||||
@ -512,7 +483,7 @@ DECLSPEC void sha1_transform (const u32 *w0, const u32 *w1, const u32 *w2, const
|
||||
digest[4] += E;
|
||||
}
|
||||
|
||||
__kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -626,10 +597,10 @@ __kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
P[i] = c_pbox[i];
|
||||
}
|
||||
|
||||
__local u32 S0_all[8][256];
|
||||
__local u32 S1_all[8][256];
|
||||
__local u32 S2_all[8][256];
|
||||
__local u32 S3_all[8][256];
|
||||
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
|
||||
|
||||
__local u32 *S0 = S0_all[lid];
|
||||
__local u32 *S1 = S1_all[lid];
|
||||
@ -731,7 +702,7 @@ __kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -752,23 +723,21 @@ __kernel void m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
|
||||
u32 P[18];
|
||||
|
||||
#pragma unroll
|
||||
for (u32 i = 0; i < 18; i++)
|
||||
{
|
||||
P[i] = tmps[gid].P[i];
|
||||
}
|
||||
|
||||
__local u32 S0_all[8][256];
|
||||
__local u32 S1_all[8][256];
|
||||
__local u32 S2_all[8][256];
|
||||
__local u32 S3_all[8][256];
|
||||
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
|
||||
|
||||
__local u32 *S0 = S0_all[lid];
|
||||
__local u32 *S1 = S1_all[lid];
|
||||
__local u32 *S2 = S2_all[lid];
|
||||
__local u32 *S3 = S3_all[lid];
|
||||
|
||||
#pragma unroll
|
||||
for (u32 i = 0; i < 256; i++)
|
||||
{
|
||||
S0[i] = tmps[gid].S0[i];
|
||||
|
@ -318,18 +318,21 @@ __constant u32a c_pbox[18] =
|
||||
0x9216d5d9, 0x8979fb1b
|
||||
};
|
||||
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
uchar4 c = as_uchar4 ((L)); \
|
||||
\
|
||||
u32 tmp; \
|
||||
\
|
||||
tmp = S0[c.s3]; \
|
||||
tmp += S1[c.s2]; \
|
||||
tmp ^= S2[c.s1]; \
|
||||
tmp += S3[c.s0]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
const u32 r0 = unpack_v8d_from_v32_S ((L)); \
|
||||
const u32 r1 = unpack_v8c_from_v32_S ((L)); \
|
||||
const u32 r2 = unpack_v8b_from_v32_S ((L)); \
|
||||
const u32 r3 = unpack_v8a_from_v32_S ((L)); \
|
||||
\
|
||||
tmp = S0[r0]; \
|
||||
tmp += S1[r1]; \
|
||||
tmp ^= S2[r2]; \
|
||||
tmp += S3[r3]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
}
|
||||
|
||||
#define BF_ENCRYPT(L,R) \
|
||||
@ -582,7 +585,7 @@ __kernel void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
||||
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
||||
{
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
@ -612,10 +615,10 @@ __kernel void m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
||||
P[i] = c_pbox[i] ^ ukey[i % 4];
|
||||
}
|
||||
|
||||
__local u32 S0_all[8][256];
|
||||
__local u32 S1_all[8][256];
|
||||
__local u32 S2_all[8][256];
|
||||
__local u32 S3_all[8][256];
|
||||
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
|
||||
|
||||
__local u32 *S0 = S0_all[lid];
|
||||
__local u32 *S1 = S1_all[lid];
|
||||
|
@ -43,7 +43,7 @@
|
||||
## Improvements
|
||||
##
|
||||
|
||||
- Cracking bcrypt: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count
|
||||
- Cracking bcrypt and Password Safe v2: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count
|
||||
- Bitcoin Wallet: Be more user friendly by allowing a larger data range for ckey and public_key
|
||||
- Building: Updated BUILD.md
|
||||
- My Wallet: Added additional plaintext pattern used in newer versions
|
||||
|
@ -1317,6 +1317,7 @@ typedef struct hc_device_param
|
||||
|
||||
bool has_vperm;
|
||||
bool has_vadd3;
|
||||
bool has_vbfe;
|
||||
|
||||
double spin_damp;
|
||||
|
||||
|
@ -80,11 +80,6 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
// based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value
|
||||
// which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result.
|
||||
// therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1.
|
||||
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
|
||||
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
|
||||
// on my development system. no clue where the 4 bytes are spent.
|
||||
// I did some research on this and it seems to be related with the datatype.
|
||||
// For example, if i used u8 instead, there's only 1 byte wasted.
|
||||
|
||||
u32 fixed_local_size = 0;
|
||||
|
||||
@ -94,7 +89,20 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
}
|
||||
else
|
||||
{
|
||||
fixed_local_size = (device_param->device_local_mem_size - 4) / 4096;
|
||||
u32 overhead = 0;
|
||||
|
||||
if (device_param->device_vendor_id == VENDOR_ID_NV)
|
||||
{
|
||||
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
|
||||
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
|
||||
// on my development system. no clue where the 4 bytes are spent.
|
||||
// I did some research on this and it seems to be related with the datatype.
|
||||
// For example, if i used u8 instead, there's only 1 byte wasted.
|
||||
|
||||
overhead = 4;
|
||||
}
|
||||
|
||||
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
|
||||
}
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
||||
|
@ -70,18 +70,31 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||
{
|
||||
const u32 kernel_threads_min = 8; // Blowfish
|
||||
char *jit_build_options = NULL;
|
||||
|
||||
return kernel_threads_min;
|
||||
}
|
||||
u32 fixed_local_size = 0;
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_threads_max = 8; // Blowfish
|
||||
if (device_param->device_type & CL_DEVICE_TYPE_CPU)
|
||||
{
|
||||
fixed_local_size = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 overhead = 0;
|
||||
|
||||
return kernel_threads_max;
|
||||
if (device_param->device_vendor_id == VENDOR_ID_NV)
|
||||
{
|
||||
overhead = 4;
|
||||
}
|
||||
|
||||
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
|
||||
}
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
bool module_outfile_check_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
@ -183,14 +196,14 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
module_ctx->module_opti_type = module_opti_type;
|
||||
|
@ -62,18 +62,31 @@ typedef struct odf11
|
||||
|
||||
static const char *SIGNATURE_ODF = "$odf$";
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||
{
|
||||
const u32 kernel_threads_max = 8; // Blowfish enforced
|
||||
char *jit_build_options = NULL;
|
||||
|
||||
return kernel_threads_max;
|
||||
}
|
||||
u32 fixed_local_size = 0;
|
||||
|
||||
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_threads_min = 8; // Blowfish enforced
|
||||
if (device_param->device_type & CL_DEVICE_TYPE_CPU)
|
||||
{
|
||||
fixed_local_size = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 overhead = 0;
|
||||
|
||||
return kernel_threads_min;
|
||||
if (device_param->device_vendor_id == VENDOR_ID_NV)
|
||||
{
|
||||
overhead = 4;
|
||||
}
|
||||
|
||||
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
|
||||
}
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
@ -335,14 +348,14 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
module_ctx->module_opti_type = module_opti_type;
|
||||
|
@ -3958,6 +3958,10 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
const bool has_vadd3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
|
||||
|
||||
device_param->has_vadd3 = has_vadd3;
|
||||
|
||||
const bool has_vbfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
|
||||
|
||||
device_param->has_vbfe = has_vbfe;
|
||||
}
|
||||
|
||||
// device_available_mem
|
||||
@ -4852,9 +4856,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
// we don't have sm_* on vendors not NV but it doesn't matter
|
||||
|
||||
#if defined (DEBUG)
|
||||
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
|
||||
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
|
||||
#else
|
||||
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
|
||||
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user