mirror of
https://github.com/hashcat/hashcat
synced 2024-11-24 14:27:14 +01:00
Optimize SM3 for use on platforms that support native bitselect()
This commit is contained in:
parent
6e45d4dafc
commit
ea6173b307
@ -15,8 +15,13 @@
|
||||
#define SM3_FF0(x, y, z) ((x) ^ (y) ^ (z))
|
||||
#define SM3_GG0(x, y, z) ((x) ^ (y) ^ (z))
|
||||
|
||||
#define SM3_FF1(x, y, z) (((x) & (y)) | (((x) | (y)) & (z)))
|
||||
#ifdef USE_BITSELECT
|
||||
#define SM3_FF1(x, y, z) (bitselect ((x), (y), ((x) ^ (z))))
|
||||
#define SM3_GG1(x, y, z) (bitselect ((z), (y), (x)))
|
||||
#else
|
||||
#define SM3_FF1(x, y, z) (((x) & (y)) | ((z) & ((x) ^ (y))))
|
||||
#define SM3_GG1(x, y, z) (((z) ^ ((x) & ((y) ^ (z)))))
|
||||
#endif
|
||||
|
||||
#define SM3_EXPAND_S(a, b, c, d, e) (SM3_P1_S(a ^ b ^ hc_rotl32_S(c, 15)) ^ hc_rotl32_S(d, 7) ^ e)
|
||||
#define SM3_EXPAND(a, b, c, d, e) (SM3_P1(a ^ b ^ hc_rotl32(c, 15)) ^ hc_rotl32(d, 7) ^ e)
|
||||
|
@ -263,6 +263,241 @@ DECLSPEC void m31100s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO
|
||||
}
|
||||
}
|
||||
|
||||
/* expansion phase optimization, for some reason slower than current implementation - probably compiler optimizer
|
||||
|
||||
DECLSPEC void m31100s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ())
|
||||
{
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
const u32 d_rev = hc_rotr32_S (search[0], 9);
|
||||
|
||||
u32 pre_t[68];
|
||||
|
||||
pre_t[ 0] = 0;
|
||||
pre_t[ 1] = w[ 1];
|
||||
pre_t[ 2] = w[ 2];
|
||||
pre_t[ 3] = w[ 3];
|
||||
pre_t[ 4] = w[ 4];
|
||||
pre_t[ 5] = w[ 5];
|
||||
pre_t[ 6] = w[ 6];
|
||||
pre_t[ 7] = w[ 7];
|
||||
pre_t[ 8] = w[ 8];
|
||||
pre_t[ 9] = w[ 9];
|
||||
pre_t[10] = w[10];
|
||||
pre_t[11] = w[11];
|
||||
pre_t[12] = w[12];
|
||||
pre_t[13] = w[13];
|
||||
pre_t[14] = w[14];
|
||||
pre_t[15] = w[15];
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 16; i < 68; i++)
|
||||
{
|
||||
pre_t[i] = SM3_EXPAND_S (pre_t[i - 16], pre_t[i - 9], pre_t[i - 3], pre_t[i - 13], pre_t[i - 6]);
|
||||
}
|
||||
|
||||
u32 w0l = w[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
|
||||
|
||||
const u32x w0 = w0l | w0r;
|
||||
|
||||
u32x t[68];
|
||||
|
||||
t[0] = w0;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 1; i < 65; i++)
|
||||
{
|
||||
t[i] = pre_t[i];
|
||||
}
|
||||
|
||||
const u32x fix16 = SM3_EXPAND ( w0, 0, 0, 0, 0);
|
||||
const u32x fix19 = SM3_EXPAND ( 0, 0, fix16, 0, 0);
|
||||
const u32x fix22 = SM3_EXPAND ( 0, 0, fix19, 0, fix16);
|
||||
const u32x fix25 = SM3_EXPAND ( 0, fix16, fix22, 0, fix19);
|
||||
const u32x fix28 = SM3_EXPAND ( 0, fix19, fix25, 0, fix22);
|
||||
const u32x fix29 = SM3_EXPAND ( 0, 0, 0, fix16, 0);
|
||||
const u32x fix31 = SM3_EXPAND ( 0, fix22, fix28, 0, fix25);
|
||||
const u32x fix32 = SM3_EXPAND (fix16, 0, fix29, fix19, 0);
|
||||
const u32x fix34 = SM3_EXPAND ( 0, fix25, fix31, 0, fix28);
|
||||
const u32x fix35 = SM3_EXPAND (fix19, 0, fix32, fix22, fix29);
|
||||
const u32x fix37 = SM3_EXPAND ( 0, fix28, fix34, 0, fix31);
|
||||
const u32x fix38 = SM3_EXPAND (fix22, fix29, fix35, fix25, fix32);
|
||||
const u32x fix40 = SM3_EXPAND ( 0, fix31, fix37, 0, fix34);
|
||||
const u32x fix41 = SM3_EXPAND (fix25, fix32, fix38, fix28, fix35);
|
||||
const u32x fix42 = SM3_EXPAND ( 0, 0, 0, fix29, 0);
|
||||
const u32x fix43 = SM3_EXPAND ( 0, fix34, fix40, 0, fix37);
|
||||
const u32x fix44 = SM3_EXPAND (fix28, fix35, fix41, fix31, fix38);
|
||||
const u32x fix45 = SM3_EXPAND (fix29, 0, fix42, fix32, 0);
|
||||
const u32x fix46 = SM3_EXPAND ( 0, fix37, fix43, 0, fix40);
|
||||
const u32x fix47 = SM3_EXPAND (fix31, fix38, fix44, fix34, fix41);
|
||||
const u32x fix48 = SM3_EXPAND (fix32, 0, fix45, fix35, fix42);
|
||||
const u32x fix49 = SM3_EXPAND ( 0, fix40, fix46, 0, fix43);
|
||||
const u32x fix50 = SM3_EXPAND (fix34, fix41, fix47, fix37, fix44);
|
||||
const u32x fix51 = SM3_EXPAND (fix35, fix42, fix48, fix38, fix45);
|
||||
const u32x fix52 = SM3_EXPAND ( 0, fix43, fix49, 0, fix46);
|
||||
const u32x fix53 = SM3_EXPAND (fix37, fix44, fix50, fix40, fix47);
|
||||
const u32x fix54 = SM3_EXPAND (fix38, fix45, fix51, fix41, fix48);
|
||||
const u32x fix55 = SM3_EXPAND ( 0, fix46, fix52, fix42, fix49);
|
||||
const u32x fix56 = SM3_EXPAND (fix40, fix47, fix53, fix43, fix50);
|
||||
const u32x fix57 = SM3_EXPAND (fix41, fix48, fix54, fix44, fix51);
|
||||
const u32x fix58 = SM3_EXPAND (fix42, fix49, fix55, fix45, fix52);
|
||||
const u32x fix59 = SM3_EXPAND (fix43, fix50, fix56, fix46, fix53);
|
||||
const u32x fix60 = SM3_EXPAND (fix44, fix51, fix57, fix47, fix54);
|
||||
const u32x fix61 = SM3_EXPAND (fix45, fix52, fix58, fix48, fix55);
|
||||
const u32x fix62 = SM3_EXPAND (fix46, fix53, fix59, fix49, fix56);
|
||||
const u32x fix63 = SM3_EXPAND (fix47, fix54, fix60, fix50, fix57);
|
||||
const u32x fix64 = SM3_EXPAND (fix48, fix55, fix61, fix51, fix58);
|
||||
|
||||
t[16] ^= fix16;
|
||||
t[19] ^= fix19;
|
||||
t[22] ^= fix22;
|
||||
t[25] ^= fix25;
|
||||
t[28] ^= fix28;
|
||||
t[29] ^= fix29;
|
||||
t[31] ^= fix31;
|
||||
t[32] ^= fix32;
|
||||
t[34] ^= fix34;
|
||||
t[35] ^= fix35;
|
||||
t[37] ^= fix37;
|
||||
t[38] ^= fix38;
|
||||
t[40] ^= fix40;
|
||||
t[41] ^= fix41;
|
||||
t[42] ^= fix42;
|
||||
t[43] ^= fix43;
|
||||
t[44] ^= fix44;
|
||||
t[45] ^= fix45;
|
||||
t[46] ^= fix46;
|
||||
t[47] ^= fix47;
|
||||
t[48] ^= fix48;
|
||||
t[49] ^= fix49;
|
||||
t[50] ^= fix50;
|
||||
t[51] ^= fix51;
|
||||
t[52] ^= fix52;
|
||||
t[53] ^= fix53;
|
||||
t[54] ^= fix54;
|
||||
t[55] ^= fix55;
|
||||
t[56] ^= fix56;
|
||||
t[57] ^= fix57;
|
||||
t[58] ^= fix58;
|
||||
t[59] ^= fix59;
|
||||
t[60] ^= fix60;
|
||||
t[61] ^= fix61;
|
||||
t[62] ^= fix62;
|
||||
t[63] ^= fix63;
|
||||
t[64] ^= fix64;
|
||||
|
||||
u32x a = SM3_IV_A;
|
||||
u32x b = SM3_IV_B;
|
||||
u32x c = SM3_IV_C;
|
||||
u32x d = SM3_IV_D;
|
||||
u32x e = SM3_IV_E;
|
||||
u32x f = SM3_IV_F;
|
||||
u32x g = SM3_IV_G;
|
||||
u32x h = SM3_IV_H;
|
||||
|
||||
SM3_ROUND1 (a, b, c, d, e, f, g, h, SM3_T00, t[ 0], t[ 0] ^ t[ 4]);
|
||||
SM3_ROUND1 (d, a, b, c, h, e, f, g, SM3_T01, t[ 1], t[ 1] ^ t[ 5]);
|
||||
SM3_ROUND1 (c, d, a, b, g, h, e, f, SM3_T02, t[ 2], t[ 2] ^ t[ 6]);
|
||||
SM3_ROUND1 (b, c, d, a, f, g, h, e, SM3_T03, t[ 3], t[ 3] ^ t[ 7]);
|
||||
SM3_ROUND1 (a, b, c, d, e, f, g, h, SM3_T04, t[ 4], t[ 4] ^ t[ 8]);
|
||||
SM3_ROUND1 (d, a, b, c, h, e, f, g, SM3_T05, t[ 5], t[ 5] ^ t[ 9]);
|
||||
SM3_ROUND1 (c, d, a, b, g, h, e, f, SM3_T06, t[ 6], t[ 6] ^ t[10]);
|
||||
SM3_ROUND1 (b, c, d, a, f, g, h, e, SM3_T07, t[ 7], t[ 7] ^ t[11]);
|
||||
SM3_ROUND1 (a, b, c, d, e, f, g, h, SM3_T08, t[ 8], t[ 8] ^ t[12]);
|
||||
SM3_ROUND1 (d, a, b, c, h, e, f, g, SM3_T09, t[ 9], t[ 9] ^ t[13]);
|
||||
SM3_ROUND1 (c, d, a, b, g, h, e, f, SM3_T10, t[10], t[10] ^ t[14]);
|
||||
SM3_ROUND1 (b, c, d, a, f, g, h, e, SM3_T11, t[11], t[11] ^ t[15]);
|
||||
SM3_ROUND1 (a, b, c, d, e, f, g, h, SM3_T12, t[12], t[12] ^ t[16]);
|
||||
SM3_ROUND1 (d, a, b, c, h, e, f, g, SM3_T13, t[13], t[13] ^ t[17]);
|
||||
SM3_ROUND1 (c, d, a, b, g, h, e, f, SM3_T14, t[14], t[14] ^ t[18]);
|
||||
SM3_ROUND1 (b, c, d, a, f, g, h, e, SM3_T15, t[15], t[15] ^ t[19]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T16, t[16], t[16] ^ t[20]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T17, t[17], t[17] ^ t[21]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T18, t[18], t[18] ^ t[22]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T19, t[19], t[19] ^ t[23]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T20, t[20], t[20] ^ t[24]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T21, t[21], t[21] ^ t[25]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T22, t[22], t[22] ^ t[26]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T23, t[23], t[23] ^ t[27]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T24, t[24], t[24] ^ t[28]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T25, t[25], t[25] ^ t[29]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T26, t[26], t[26] ^ t[30]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T27, t[27], t[27] ^ t[31]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T28, t[28], t[28] ^ t[32]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T29, t[29], t[29] ^ t[33]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T30, t[30], t[30] ^ t[34]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T31, t[31], t[31] ^ t[35]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T32, t[32], t[32] ^ t[36]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T33, t[33], t[33] ^ t[37]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T34, t[34], t[34] ^ t[38]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T35, t[35], t[35] ^ t[39]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T36, t[36], t[36] ^ t[40]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T37, t[37], t[37] ^ t[41]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T38, t[38], t[38] ^ t[42]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T39, t[39], t[39] ^ t[43]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T40, t[40], t[40] ^ t[44]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T41, t[41], t[41] ^ t[45]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T42, t[42], t[42] ^ t[46]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T43, t[43], t[43] ^ t[47]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T44, t[44], t[44] ^ t[48]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T45, t[45], t[45] ^ t[49]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T46, t[46], t[46] ^ t[50]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T47, t[47], t[47] ^ t[51]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T48, t[48], t[48] ^ t[52]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T49, t[49], t[49] ^ t[53]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T50, t[50], t[50] ^ t[54]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T51, t[51], t[51] ^ t[55]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T52, t[52], t[52] ^ t[56]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T53, t[53], t[53] ^ t[57]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T54, t[54], t[54] ^ t[58]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T55, t[55], t[55] ^ t[59]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T56, t[56], t[56] ^ t[60]);
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T57, t[57], t[57] ^ t[61]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T58, t[58], t[58] ^ t[62]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T59, t[59], t[59] ^ t[63]);
|
||||
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T60, t[60], t[60] ^ t[64]);
|
||||
|
||||
if (MATCHES_NONE_VS (d, d_rev)) continue;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 65; i < 68; i++)
|
||||
{
|
||||
t[i] = pre_t[i];
|
||||
}
|
||||
|
||||
const u32x fix65 = SM3_EXPAND (fix49, fix56, fix62, fix52, fix59);
|
||||
const u32x fix66 = SM3_EXPAND (fix50, fix57, fix63, fix53, fix60);
|
||||
const u32x fix67 = SM3_EXPAND (fix51, fix58, fix64, fix54, fix61);
|
||||
|
||||
t[65] ^= fix65;
|
||||
t[66] ^= fix66;
|
||||
t[67] ^= fix67;
|
||||
|
||||
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T61, t[61], t[61] ^ t[65]);
|
||||
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T62, t[62], t[62] ^ t[66]);
|
||||
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T63, t[63], t[63] ^ t[67]);
|
||||
|
||||
COMPARE_S_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
KERNEL_FQ void m31100_m04 (KERN_ATTR_VECTOR ())
|
||||
{
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user