1
mirror of https://github.com/hashcat/hashcat synced 2024-11-24 14:27:14 +01:00

Optimize GCM code to use only u32 data types, make it CUDA compatible and remove some branches

This commit is contained in:
Jens Steube 2021-04-05 17:59:42 +02:00
parent b3a6e2dca6
commit 71a8f97294
7 changed files with 88 additions and 166 deletions

View File

@ -10,104 +10,55 @@
#include "inc_cipher_aes.h"
#include "inc_cipher_aes-gcm.h"
#ifndef AES_GCM_ALT1
DECLSPEC void AES_GCM_shift_right_block(uchar *block)
{
u32 val;
uchar16 *v = (uchar16 *) block;
uint4 *p = (uint4 *) block;
val = hc_swap32_S (p[0].w);
val >>= 1;
if (v[0].sb & 0x01) val |= 0x80000000;
p[0].w = hc_swap32_S (val);
val = hc_swap32_S (p[0].z);
val >>= 1;
if (v[0].s7 & 0x01) val |= 0x80000000;
p[0].z = hc_swap32_S (val);
val = hc_swap32_S (p[0].y);
val >>= 1;
if (v[0].s3 & 0x01) val |= 0x80000000;
p[0].y = hc_swap32_S (val);
val = hc_swap32_S (p[0].x);
val >>= 1;
p[0].x = hc_swap32_S (val);
}
#endif // AES_GCM_ALT1
DECLSPEC void AES_GCM_inc32 (u32 *block)
{
block[3] += 0x00000001;
block[3] += 1;
}
DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src)
{
*dst++ ^= *src++;
*dst++ ^= *src++;
*dst++ ^= *src++;
*dst++ ^= *src++;
dst[0] ^= src[0];
dst[1] ^= src[1];
dst[2] ^= src[2];
dst[3] ^= src[3];
}
DECLSPEC void AES_GCM_gf_mult (const uchar16 *x, const uchar16 *y, uchar16 *z)
DECLSPEC void AES_GCM_gf_mult (const u32 *x, const u32 *y, u32 *z)
{
u32 i, j;
z[0] = 0;
z[1] = 0;
z[2] = 0;
z[3] = 0;
uchar16 v = y[0].s32107654ba98fedc;
u32 t[4];
u8 x_char[16] = { x[0].s3, x[0].s2, x[0].s1, x[0].s0, x[0].s7, x[0].s6, x[0].s5, x[0].s4, x[0].sb, x[0].sa, x[0].s9, x[0].s8, x[0].sf, x[0].se, x[0].sd, x[0].sc };
t[0] = y[0];
t[1] = y[1];
t[2] = y[2];
t[3] = y[3];
#ifndef AES_GCM_ALT1
u8 *v_char = (u8 *) &v;
#endif
u32 *i_char = (u32 *) &v;
u8 t = 0;
for (i = 0; i < 16; i++)
for (int i = 0; i < 4; i++)
{
for (j = 0; j < 8; j++)
const u32 tv = x[i];
for (int j = 0; j < 32; j++)
{
if (x_char[i] & 1 << (7 - j))
if ((tv >> (31 - j)) & 1)
{
z[0] ^= v;
z[0] ^= t[0];
z[1] ^= t[1];
z[2] ^= t[2];
z[3] ^= t[3];
}
t = v.sf & 0x01;
const int m = t[3] & 1; // save lost bit
#ifndef AES_GCM_ALT1
t[3] = (t[2] << 31) | (t[3] >> 1);
t[2] = (t[1] << 31) | (t[2] >> 1);
t[1] = (t[0] << 31) | (t[1] >> 1);
t[0] = 0 | (t[0] >> 1);
AES_GCM_shift_right_block(v_char);
#else
i_char[0] = hc_swap32_S (i_char[0]);
i_char[1] = hc_swap32_S (i_char[1]);
i_char[2] = hc_swap32_S (i_char[2]);
i_char[3] = hc_swap32_S (i_char[3]);
i_char[3] = (i_char[3] >> 1) | (i_char[2] << 31);
i_char[2] = (i_char[2] >> 1) | (i_char[1] << 31);
i_char[1] = (i_char[1] >> 1) | (i_char[0] << 31);
i_char[0] >>= 1;
i_char[0] = hc_swap32_S (i_char[0]);
i_char[1] = hc_swap32_S (i_char[1]);
i_char[2] = hc_swap32_S (i_char[2]);
i_char[3] = hc_swap32_S (i_char[3]);
#endif // AES_GCM_ALT1
if (t)
{
v.s0 ^= 0xe1;
}
t[0] ^= m * 0xe1000000;
}
}
}
@ -126,12 +77,7 @@ DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, u32 in_len, u32 *
xpos += 4;
AES_GCM_gf_mult ((uchar16 *) out, (uchar16 *) subkey, (uchar16 *) tmp);
tmp[0] = hc_swap32_S (tmp[0]);
tmp[1] = hc_swap32_S (tmp[1]);
tmp[2] = hc_swap32_S (tmp[2]);
tmp[3] = hc_swap32_S (tmp[3]);
AES_GCM_gf_mult (out, subkey, tmp);
out[0] = tmp[0];
out[1] = tmp[1];
@ -155,7 +101,12 @@ DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, u32 in_len, u32 *
AES_GCM_xor_block (out, tmp);
AES_GCM_gf_mult ((uchar16 *) out, (uchar16 *) subkey, (uchar16 *) tmp);
AES_GCM_gf_mult (out, subkey, tmp);
tmp[0] = hc_swap32_S (tmp[0]);
tmp[1] = hc_swap32_S (tmp[1]);
tmp[2] = hc_swap32_S (tmp[2]);
tmp[3] = hc_swap32_S (tmp[3]);
out[0] = tmp[0];
out[1] = tmp[1];
@ -202,8 +153,11 @@ DECLSPEC void AES_GCM_Prepare_J0 (const u32 *iv, u32 iv_len, const u32 *subkey,
J0[2] = iv[2];
J0[3] = iv[3];
u32 len_buf[4] = { 0 };
u32 len_buf[4];
len_buf[0] = 0;
len_buf[1] = 0;
len_buf[2] = 0;
len_buf[3] = iv_len * 8;
AES_GCM_ghash (subkey, len_buf, 16, J0);
@ -213,11 +167,17 @@ DECLSPEC void AES_GCM_Prepare_J0 (const u32 *iv, u32 iv_len, const u32 *subkey,
DECLSPEC void AES_GCM_gctr (const u32 *key, const u32 *iv, const u32 *in, u32 in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{
const u32 *xpos = in;
u32 *ypos = out;
u32 n = in_len / 16;
u32 iv_buf[4];
u32 iv_buf[4] = { iv[0], iv[1], iv[2], iv[3] };
iv_buf[0] = iv[0];
iv_buf[1] = iv[1];
iv_buf[2] = iv[2];
iv_buf[3] = iv[3];
const u32 n = in_len / 16;
for (u32 i = 0; i < n; i++)
{
@ -247,20 +207,18 @@ DECLSPEC void AES_GCM_gctr (const u32 *key, const u32 *iv, const u32 *in, u32 in
DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, u32 *in, u32 in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{
u32 J0_incr[4] = {
J0[0],
J0[1],
J0[2],
J0[3],
};
u32 J0_incr[4];
J0_incr[0] = J0[0];
J0_incr[1] = J0[1];
J0_incr[2] = J0[2];
J0_incr[3] = J0[3];
AES_GCM_gctr (key, J0_incr, in, in_len, out, s_te0, s_te1, s_te2, s_te3, s_te4);
}
DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, u32 aad_len, u32 *enc_buf, u32 enc_len, u32 *out)
{
u32 len_buf[4] = { 0 };
out[0] = 0;
out[1] = 0;
out[2] = 0;
@ -283,7 +241,11 @@ DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, u32 aad_len,
out[2] = hc_swap32_S (out[2]);
out[3] = hc_swap32_S (out[3]);
u32 len_buf[4];
len_buf[0] = aad_len * 8;
len_buf[1] = 0;
len_buf[2] = 0;
len_buf[3] = enc_len * 8;
AES_GCM_ghash (subkey, len_buf, 16, out);

View File

@ -6,13 +6,9 @@
#ifndef _INC_CIPHER_AES_GCM_H
#define _INC_CIPHER_AES_GCM_H
#ifndef AES_GCM_ALT1
DECLSPEC void AES_GCM_shift_right_block(uchar *block);
#endif
DECLSPEC void AES_GCM_inc32 (u32 *block);
DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src);
DECLSPEC void AES_GCM_gf_mult (const uchar16 *x, const uchar16 *y, uchar16 *z);
DECLSPEC void AES_GCM_gf_mult (const u32 *x, const u32 *y, u32 *z);
DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, u32 in_len, u32 *out);
DECLSPEC void AES_GCM_Init (const u32 *ukey, u32 key_len, u32 *key, u32 *subkey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4);
DECLSPEC void AES_GCM_Prepare_J0 (const u32 *iv, u32 iv_len, const u32 *subkey, u32 *J0);

View File

@ -82,7 +82,7 @@ DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *i
sha256_transform_vector (w0, w1, w2, w3, digest);
}
KERNEL_FQ void m27000_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
KERNEL_FQ void m25500_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
{
/**
* base
@ -166,7 +166,7 @@ KERNEL_FQ void m27000_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
}
}
KERNEL_FQ void m27000_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
KERNEL_FQ void m25500_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
{
const u64 gid = get_global_id (0);
@ -272,15 +272,11 @@ KERNEL_FQ void m27000_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
}
}
KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* aes shared
@ -288,9 +284,6 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
#ifdef REAL_SHM
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
LOCAL_VK u32 s_te0[256];
LOCAL_VK u32 s_te1[256];
LOCAL_VK u32 s_te2[256];
@ -387,36 +380,21 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
S[2] ^= enc[2];
S[3] ^= enc[3];
AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t);
t[0] = hc_swap32_S (t[0]);
t[1] = hc_swap32_S (t[1]);
t[2] = hc_swap32_S (t[2]);
t[3] = hc_swap32_S (t[3]);
AES_GCM_gf_mult (S, subKey, t);
S[0] = t[0] ^ enc[4];
S[1] = t[1] ^ enc[5];
S[2] = t[2] ^ enc[6];
S[3] = t[3] ^ enc[7];
AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t);
t[0] = hc_swap32_S (t[0]);
t[1] = hc_swap32_S (t[1]);
t[2] = hc_swap32_S (t[2]);
t[3] = hc_swap32_S (t[3]);
AES_GCM_gf_mult (S, subKey, t);
S[0] = t[0] ^ enc[8];
S[1] = t[1] ^ enc[9];
S[2] = t[2] ^ enc[10];
S[3] = t[3] ^ enc[11];
AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t);
t[0] = hc_swap32_S (t[0]);
t[1] = hc_swap32_S (t[1]);
t[2] = hc_swap32_S (t[2]);
t[3] = hc_swap32_S (t[3]);
AES_GCM_gf_mult (S, subKey, t);
S[0] = t[0];
S[1] = t[1];
@ -433,12 +411,12 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
S[2] ^= t[2];
S[3] ^= t[3];
AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t);
AES_GCM_gf_mult (S, subKey, t);
S[0] = hc_swap32_S (t[0]);
S[1] = hc_swap32_S (t[1]);
S[2] = hc_swap32_S (t[2]);
S[3] = hc_swap32_S (t[3]);
S[0] = t[0];
S[1] = t[1];
S[2] = t[2];
S[3] = t[3];
u32 len_buf[4] = { 0 };
@ -450,12 +428,12 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
S[2] ^= len_buf[2];
S[3] ^= len_buf[3];
AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t);
AES_GCM_gf_mult (S, subKey, t);
S[0] = hc_swap32_S (t[0]);
S[1] = hc_swap32_S (t[1]);
S[2] = hc_swap32_S (t[2]);
S[3] = hc_swap32_S (t[3]);
S[0] = t[0];
S[1] = t[1];
S[2] = t[2];
S[3] = t[3];
J0[3] = 0x00000001;

View File

@ -82,7 +82,7 @@ DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *i
sha256_transform_vector (w0, w1, w2, w3, digest);
}
KERNEL_FQ void m27000_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
KERNEL_FQ void m25500_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
{
/**
* base
@ -166,7 +166,7 @@ KERNEL_FQ void m27000_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
}
}
KERNEL_FQ void m27000_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
KERNEL_FQ void m25500_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
{
const u64 gid = get_global_id (0);
@ -272,15 +272,11 @@ KERNEL_FQ void m27000_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
}
}
KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* aes shared
@ -288,9 +284,6 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
#ifdef REAL_SHM
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
LOCAL_VK u32 s_te0[256];
LOCAL_VK u32 s_te1[256];
LOCAL_VK u32 s_te2[256];
@ -376,12 +369,12 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
u32 enc_len = esalt_bufs[DIGESTS_OFFSET].ct_len;
/*
/*
// decrypt buffer is not usefull here, skip
u32 dec[14] = { 0 };
AES_GCM_GCTR (key, J0, enc, enc_len, dec, s_te0, s_te1, s_te2, s_te3, s_te4);
*/
*/
u32 T[4] = { 0 };
u32 S[4] = { 0 };

View File

@ -16,10 +16,10 @@
- Added hash-mode: RAR3-p (Uncompressed)
- Added hash-mode: RSA/DSA/EC/OPENSSH Private Keys
- Added hash-mode: SQLCipher
- Added hash-mode: Stargazer Stellar Wallet XLM
- Added hash-mode: Stuffit5
- Added hash-mode: Umbraco HMAC-SHA1
- Added hash-mode: sha1(sha1($pass).$salt)
- Added hash-mode: Stargazer Stellar Wallet XLM, PBKDF2-HMAC-SHA256 + AES-256-GCM
##
## Features

View File

@ -292,6 +292,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
- Blockchain, My Wallet
- Blockchain, My Wallet, V2
- Blockchain, My Wallet, Second Password (SHA256)
- Stargazer Stellar Wallet XLM
- Ethereum Pre-Sale Wallet, PBKDF2-HMAC-SHA256
- Ethereum Wallet, PBKDF2-HMAC-SHA256
- Ethereum Wallet, SCRYPT
@ -340,7 +341,6 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
- Django (SHA-1)
- Web2py pbkdf2-sha512
- TOTP (HMAC-SHA1)
- Stargazer Stellar Wallet XLM, PBKDF2-HMAC-SHA256 + AES-256-GCM
- Dahua Authentication MD5
##

View File

@ -18,8 +18,8 @@ static const u32 DGST_POS2 = 2;
static const u32 DGST_POS3 = 3;
static const u32 DGST_SIZE = DGST_SIZE_4_4;
static const u32 HASH_CATEGORY = HASH_CATEGORY_PASSWORD_MANAGER;
static const char *HASH_NAME = "Stargazer Stellar Wallet XLM, PBKDF2-HMAC-SHA256 + AES-256-GCM";
static const u64 KERN_TYPE = 27000;
static const char *HASH_NAME = "Stargazer Stellar Wallet XLM";
static const u64 KERN_TYPE = 25500;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
@ -76,13 +76,6 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
return jit_build_options;
}
// NVIDIA GPU
if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
{
// aes expandkey produce wrong results with this kernel if REAL_SHM is enabled
hc_asprintf (&jit_build_options, "-D _unroll -D FORCE_DISABLE_SHM");
}
// ROCM
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
{