mirror of
https://github.com/hashcat/hashcat
synced 2024-12-01 20:18:12 +01:00
fixes #1298: add pure kernels for -m 600 = BLAKE2b-512
This commit is contained in:
parent
5628317de8
commit
bd9304724c
6776
OpenCL/inc_common.cl
6776
OpenCL/inc_common.cl
File diff suppressed because it is too large
Load Diff
@ -262,6 +262,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le (u32x *w0, u32x *w1, u32x *w2, u3
|
||||
DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *c0, u32x *c1, u32x *c2, u32x *c3, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u32x *c0, u32x *c1, u32x *c2, u32x *c3, u32x *c4, u32x *c5, u32x *c6, u32x *c7, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u32x *c0, u32x *c1, u32x *c2, u32x *c3, u32x *c4, u32x *c5, u32x *c6, u32x *c7, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset);
|
||||
@ -289,6 +290,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le_S (u32 *w0, u32 *w1, u32 *w2, u32
|
||||
DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *c0, u32 *c1, u32 *c2, u32 *c3, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, u32 *c0, u32 *c1, u32 *c2, u32 *c3, u32 *c4, u32 *c5, u32 *c6, u32 *c7, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, u32 *c0, u32 *c1, u32 *c2, u32 *c3, u32 *c4, u32 *c5, u32 *c6, u32 *c7, const u32 offset);
|
||||
DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset);
|
||||
|
662
OpenCL/inc_hash_blake2b.cl
Normal file
662
OpenCL/inc_hash_blake2b.cl
Normal file
@ -0,0 +1,662 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.h"
|
||||
#include "inc_common.h"
|
||||
#include "inc_hash_blake2b.h"
|
||||
|
||||
DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const u32 len, const u64 f0)
|
||||
{
|
||||
const u64 t0 = hl32_to_64_S (0, len);
|
||||
|
||||
u64 v[16];
|
||||
|
||||
v[ 0] = h[0];
|
||||
v[ 1] = h[1];
|
||||
v[ 2] = h[2];
|
||||
v[ 3] = h[3];
|
||||
v[ 4] = h[4];
|
||||
v[ 5] = h[5];
|
||||
v[ 6] = h[6];
|
||||
v[ 7] = h[7];
|
||||
v[ 8] = BLAKE2B_IV_00;
|
||||
v[ 9] = BLAKE2B_IV_01;
|
||||
v[10] = BLAKE2B_IV_02;
|
||||
v[11] = BLAKE2B_IV_03;
|
||||
v[12] = BLAKE2B_IV_04 ^ t0;
|
||||
v[13] = BLAKE2B_IV_05; // ^ t1;
|
||||
v[14] = BLAKE2B_IV_06 ^ f0;
|
||||
v[15] = BLAKE2B_IV_07; // ^ f1;
|
||||
|
||||
BLAKE2B_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
BLAKE2B_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
|
||||
BLAKE2B_ROUND (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4);
|
||||
BLAKE2B_ROUND ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8);
|
||||
BLAKE2B_ROUND ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13);
|
||||
BLAKE2B_ROUND ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9);
|
||||
BLAKE2B_ROUND (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11);
|
||||
BLAKE2B_ROUND (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10);
|
||||
BLAKE2B_ROUND ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5);
|
||||
BLAKE2B_ROUND (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0);
|
||||
BLAKE2B_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
BLAKE2B_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
|
||||
|
||||
h[0] = h[0] ^ v[0] ^ v[ 8];
|
||||
h[1] = h[1] ^ v[1] ^ v[ 9];
|
||||
h[2] = h[2] ^ v[2] ^ v[10];
|
||||
h[3] = h[3] ^ v[3] ^ v[11];
|
||||
h[4] = h[4] ^ v[4] ^ v[12];
|
||||
h[5] = h[5] ^ v[5] ^ v[13];
|
||||
h[6] = h[6] ^ v[6] ^ v[14];
|
||||
h[7] = h[7] ^ v[7] ^ v[15];
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_init (blake2b_ctx_t *ctx)
|
||||
{
|
||||
ctx->h[0] = BLAKE2B_IV_00 ^ 0x01010040; // default output length: 0x40 = 64 bytes
|
||||
ctx->h[1] = BLAKE2B_IV_01;
|
||||
ctx->h[2] = BLAKE2B_IV_02;
|
||||
ctx->h[3] = BLAKE2B_IV_03;
|
||||
ctx->h[4] = BLAKE2B_IV_04;
|
||||
ctx->h[5] = BLAKE2B_IV_05;
|
||||
ctx->h[6] = BLAKE2B_IV_06;
|
||||
ctx->h[7] = BLAKE2B_IV_07;
|
||||
|
||||
ctx->m[ 0] = 0;
|
||||
ctx->m[ 1] = 0;
|
||||
ctx->m[ 2] = 0;
|
||||
ctx->m[ 3] = 0;
|
||||
ctx->m[ 4] = 0;
|
||||
ctx->m[ 5] = 0;
|
||||
ctx->m[ 6] = 0;
|
||||
ctx->m[ 7] = 0;
|
||||
ctx->m[ 8] = 0;
|
||||
ctx->m[ 9] = 0;
|
||||
ctx->m[10] = 0;
|
||||
ctx->m[11] = 0;
|
||||
ctx->m[12] = 0;
|
||||
ctx->m[13] = 0;
|
||||
ctx->m[14] = 0;
|
||||
ctx->m[15] = 0;
|
||||
|
||||
ctx->len = 0;
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_update_128 (blake2b_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const u32 len)
|
||||
{
|
||||
MAYBE_VOLATILE const u32 pos = ctx->len & 127;
|
||||
|
||||
if (pos == 0)
|
||||
{
|
||||
if (ctx->len > 0) // if new block (pos == 0) AND the (old) len is not zero => transform
|
||||
{
|
||||
blake2b_transform (ctx->h, ctx->m, ctx->len, BLAKE2B_UPDATE);
|
||||
}
|
||||
|
||||
ctx->m[ 0] = hl32_to_64_S (w0[1], w0[0]);
|
||||
ctx->m[ 1] = hl32_to_64_S (w0[3], w0[2]);
|
||||
ctx->m[ 2] = hl32_to_64_S (w1[1], w1[0]);
|
||||
ctx->m[ 3] = hl32_to_64_S (w1[3], w1[2]);
|
||||
ctx->m[ 4] = hl32_to_64_S (w2[1], w2[0]);
|
||||
ctx->m[ 5] = hl32_to_64_S (w2[3], w2[2]);
|
||||
ctx->m[ 6] = hl32_to_64_S (w3[1], w3[0]);
|
||||
ctx->m[ 7] = hl32_to_64_S (w3[3], w3[2]);
|
||||
ctx->m[ 8] = hl32_to_64_S (w4[1], w4[0]);
|
||||
ctx->m[ 9] = hl32_to_64_S (w4[3], w4[2]);
|
||||
ctx->m[10] = hl32_to_64_S (w5[1], w5[0]);
|
||||
ctx->m[11] = hl32_to_64_S (w5[3], w5[2]);
|
||||
ctx->m[12] = hl32_to_64_S (w6[1], w6[0]);
|
||||
ctx->m[13] = hl32_to_64_S (w6[3], w6[2]);
|
||||
ctx->m[14] = hl32_to_64_S (w7[1], w7[0]);
|
||||
ctx->m[15] = hl32_to_64_S (w7[3], w7[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((pos + len) <= 128)
|
||||
{
|
||||
switch_buffer_by_offset_8x4_le_S (w0, w1, w2, w3, w4, w5, w6, w7, pos);
|
||||
|
||||
ctx->m[ 0] |= hl32_to_64_S (w0[1], w0[0]);
|
||||
ctx->m[ 1] |= hl32_to_64_S (w0[3], w0[2]);
|
||||
ctx->m[ 2] |= hl32_to_64_S (w1[1], w1[0]);
|
||||
ctx->m[ 3] |= hl32_to_64_S (w1[3], w1[2]);
|
||||
ctx->m[ 4] |= hl32_to_64_S (w2[1], w2[0]);
|
||||
ctx->m[ 5] |= hl32_to_64_S (w2[3], w2[2]);
|
||||
ctx->m[ 6] |= hl32_to_64_S (w3[1], w3[0]);
|
||||
ctx->m[ 7] |= hl32_to_64_S (w3[3], w3[2]);
|
||||
ctx->m[ 8] |= hl32_to_64_S (w4[1], w4[0]);
|
||||
ctx->m[ 9] |= hl32_to_64_S (w4[3], w4[2]);
|
||||
ctx->m[10] |= hl32_to_64_S (w5[1], w5[0]);
|
||||
ctx->m[11] |= hl32_to_64_S (w5[3], w5[2]);
|
||||
ctx->m[12] |= hl32_to_64_S (w6[1], w6[0]);
|
||||
ctx->m[13] |= hl32_to_64_S (w6[3], w6[2]);
|
||||
ctx->m[14] |= hl32_to_64_S (w7[1], w7[0]);
|
||||
ctx->m[15] |= hl32_to_64_S (w7[3], w7[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 c0[4] = { 0 };
|
||||
u32 c1[4] = { 0 };
|
||||
u32 c2[4] = { 0 };
|
||||
u32 c3[4] = { 0 };
|
||||
u32 c4[4] = { 0 };
|
||||
u32 c5[4] = { 0 };
|
||||
u32 c6[4] = { 0 };
|
||||
u32 c7[4] = { 0 };
|
||||
|
||||
switch_buffer_by_offset_8x4_carry_le_S (w0, w1, w2, w3, w4, w5, w6, w7, c0, c1, c2, c3, c4, c5, c6, c7, pos);
|
||||
|
||||
ctx->m[ 0] |= hl32_to_64_S (w0[1], w0[0]);
|
||||
ctx->m[ 1] |= hl32_to_64_S (w0[3], w0[2]);
|
||||
ctx->m[ 2] |= hl32_to_64_S (w1[1], w1[0]);
|
||||
ctx->m[ 3] |= hl32_to_64_S (w1[3], w1[2]);
|
||||
ctx->m[ 4] |= hl32_to_64_S (w2[1], w2[0]);
|
||||
ctx->m[ 5] |= hl32_to_64_S (w2[3], w2[2]);
|
||||
ctx->m[ 6] |= hl32_to_64_S (w3[1], w3[0]);
|
||||
ctx->m[ 7] |= hl32_to_64_S (w3[3], w3[2]);
|
||||
ctx->m[ 8] |= hl32_to_64_S (w4[1], w4[0]);
|
||||
ctx->m[ 9] |= hl32_to_64_S (w4[3], w4[2]);
|
||||
ctx->m[10] |= hl32_to_64_S (w5[1], w5[0]);
|
||||
ctx->m[11] |= hl32_to_64_S (w5[3], w5[2]);
|
||||
ctx->m[12] |= hl32_to_64_S (w6[1], w6[0]);
|
||||
ctx->m[13] |= hl32_to_64_S (w6[3], w6[2]);
|
||||
ctx->m[14] |= hl32_to_64_S (w7[1], w7[0]);
|
||||
ctx->m[15] |= hl32_to_64_S (w7[3], w7[2]);
|
||||
|
||||
// len must be a multiple of 128 (not ctx->len) for BLAKE2B_UPDATE:
|
||||
|
||||
const u32 cur_len = ((ctx->len + len) / 128) * 128;
|
||||
|
||||
blake2b_transform (ctx->h, ctx->m, cur_len, BLAKE2B_UPDATE);
|
||||
|
||||
ctx->m[ 0] = hl32_to_64_S (c0[1], c0[0]);
|
||||
ctx->m[ 1] = hl32_to_64_S (c0[3], c0[2]);
|
||||
ctx->m[ 2] = hl32_to_64_S (c1[1], c1[0]);
|
||||
ctx->m[ 3] = hl32_to_64_S (c1[3], c1[2]);
|
||||
ctx->m[ 4] = hl32_to_64_S (c2[1], c2[0]);
|
||||
ctx->m[ 5] = hl32_to_64_S (c2[3], c2[2]);
|
||||
ctx->m[ 6] = hl32_to_64_S (c3[1], c3[0]);
|
||||
ctx->m[ 7] = hl32_to_64_S (c3[3], c3[2]);
|
||||
ctx->m[ 8] = hl32_to_64_S (c4[1], c4[0]);
|
||||
ctx->m[ 9] = hl32_to_64_S (c4[3], c4[2]);
|
||||
ctx->m[10] = hl32_to_64_S (c5[1], c5[0]);
|
||||
ctx->m[11] = hl32_to_64_S (c5[3], c5[2]);
|
||||
ctx->m[12] = hl32_to_64_S (c6[1], c6[0]);
|
||||
ctx->m[13] = hl32_to_64_S (c6[3], c6[2]);
|
||||
ctx->m[14] = hl32_to_64_S (c7[1], c7[0]);
|
||||
ctx->m[15] = hl32_to_64_S (c7[3], c7[2]);
|
||||
}
|
||||
}
|
||||
|
||||
ctx->len += len;
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const u32 len)
|
||||
{
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32 w4[4];
|
||||
u32 w5[4];
|
||||
u32 w6[4];
|
||||
u32 w7[4];
|
||||
|
||||
const int limit = (const int) len - 128; // int type needed, could be negative
|
||||
|
||||
int pos1;
|
||||
int pos4;
|
||||
|
||||
for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 128, pos4 += 32)
|
||||
{
|
||||
w0[0] = w[pos4 + 0];
|
||||
w0[1] = w[pos4 + 1];
|
||||
w0[2] = w[pos4 + 2];
|
||||
w0[3] = w[pos4 + 3];
|
||||
w1[0] = w[pos4 + 4];
|
||||
w1[1] = w[pos4 + 5];
|
||||
w1[2] = w[pos4 + 6];
|
||||
w1[3] = w[pos4 + 7];
|
||||
w2[0] = w[pos4 + 8];
|
||||
w2[1] = w[pos4 + 9];
|
||||
w2[2] = w[pos4 + 10];
|
||||
w2[3] = w[pos4 + 11];
|
||||
w3[0] = w[pos4 + 12];
|
||||
w3[1] = w[pos4 + 13];
|
||||
w3[2] = w[pos4 + 14];
|
||||
w3[3] = w[pos4 + 15];
|
||||
w4[0] = w[pos4 + 16];
|
||||
w4[1] = w[pos4 + 17];
|
||||
w4[2] = w[pos4 + 18];
|
||||
w4[3] = w[pos4 + 19];
|
||||
w5[0] = w[pos4 + 20];
|
||||
w5[1] = w[pos4 + 21];
|
||||
w5[2] = w[pos4 + 22];
|
||||
w5[3] = w[pos4 + 23];
|
||||
w6[0] = w[pos4 + 24];
|
||||
w6[1] = w[pos4 + 25];
|
||||
w6[2] = w[pos4 + 26];
|
||||
w6[3] = w[pos4 + 27];
|
||||
w7[0] = w[pos4 + 28];
|
||||
w7[1] = w[pos4 + 29];
|
||||
w7[2] = w[pos4 + 30];
|
||||
w7[3] = w[pos4 + 31];
|
||||
|
||||
blake2b_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
|
||||
}
|
||||
|
||||
w0[0] = w[pos4 + 0];
|
||||
w0[1] = w[pos4 + 1];
|
||||
w0[2] = w[pos4 + 2];
|
||||
w0[3] = w[pos4 + 3];
|
||||
w1[0] = w[pos4 + 4];
|
||||
w1[1] = w[pos4 + 5];
|
||||
w1[2] = w[pos4 + 6];
|
||||
w1[3] = w[pos4 + 7];
|
||||
w2[0] = w[pos4 + 8];
|
||||
w2[1] = w[pos4 + 9];
|
||||
w2[2] = w[pos4 + 10];
|
||||
w2[3] = w[pos4 + 11];
|
||||
w3[0] = w[pos4 + 12];
|
||||
w3[1] = w[pos4 + 13];
|
||||
w3[2] = w[pos4 + 14];
|
||||
w3[3] = w[pos4 + 15];
|
||||
w4[0] = w[pos4 + 16];
|
||||
w4[1] = w[pos4 + 17];
|
||||
w4[2] = w[pos4 + 18];
|
||||
w4[3] = w[pos4 + 19];
|
||||
w5[0] = w[pos4 + 20];
|
||||
w5[1] = w[pos4 + 21];
|
||||
w5[2] = w[pos4 + 22];
|
||||
w5[3] = w[pos4 + 23];
|
||||
w6[0] = w[pos4 + 24];
|
||||
w6[1] = w[pos4 + 25];
|
||||
w6[2] = w[pos4 + 26];
|
||||
w6[3] = w[pos4 + 27];
|
||||
w7[0] = w[pos4 + 28];
|
||||
w7[1] = w[pos4 + 29];
|
||||
w7[2] = w[pos4 + 30];
|
||||
w7[3] = w[pos4 + 31];
|
||||
|
||||
blake2b_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - (u32) pos1);
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const u32 len)
|
||||
{
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32 w4[4];
|
||||
u32 w5[4];
|
||||
u32 w6[4];
|
||||
u32 w7[4];
|
||||
|
||||
const int limit = (const int) len - 128; // int type needed, could be negative
|
||||
|
||||
int pos1;
|
||||
int pos4;
|
||||
|
||||
for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 128, pos4 += 32)
|
||||
{
|
||||
w0[0] = w[pos4 + 0];
|
||||
w0[1] = w[pos4 + 1];
|
||||
w0[2] = w[pos4 + 2];
|
||||
w0[3] = w[pos4 + 3];
|
||||
w1[0] = w[pos4 + 4];
|
||||
w1[1] = w[pos4 + 5];
|
||||
w1[2] = w[pos4 + 6];
|
||||
w1[3] = w[pos4 + 7];
|
||||
w2[0] = w[pos4 + 8];
|
||||
w2[1] = w[pos4 + 9];
|
||||
w2[2] = w[pos4 + 10];
|
||||
w2[3] = w[pos4 + 11];
|
||||
w3[0] = w[pos4 + 12];
|
||||
w3[1] = w[pos4 + 13];
|
||||
w3[2] = w[pos4 + 14];
|
||||
w3[3] = w[pos4 + 15];
|
||||
w4[0] = w[pos4 + 16];
|
||||
w4[1] = w[pos4 + 17];
|
||||
w4[2] = w[pos4 + 18];
|
||||
w4[3] = w[pos4 + 19];
|
||||
w5[0] = w[pos4 + 20];
|
||||
w5[1] = w[pos4 + 21];
|
||||
w5[2] = w[pos4 + 22];
|
||||
w5[3] = w[pos4 + 23];
|
||||
w6[0] = w[pos4 + 24];
|
||||
w6[1] = w[pos4 + 25];
|
||||
w6[2] = w[pos4 + 26];
|
||||
w6[3] = w[pos4 + 27];
|
||||
w7[0] = w[pos4 + 28];
|
||||
w7[1] = w[pos4 + 29];
|
||||
w7[2] = w[pos4 + 30];
|
||||
w7[3] = w[pos4 + 31];
|
||||
|
||||
blake2b_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
|
||||
}
|
||||
|
||||
w0[0] = w[pos4 + 0];
|
||||
w0[1] = w[pos4 + 1];
|
||||
w0[2] = w[pos4 + 2];
|
||||
w0[3] = w[pos4 + 3];
|
||||
w1[0] = w[pos4 + 4];
|
||||
w1[1] = w[pos4 + 5];
|
||||
w1[2] = w[pos4 + 6];
|
||||
w1[3] = w[pos4 + 7];
|
||||
w2[0] = w[pos4 + 8];
|
||||
w2[1] = w[pos4 + 9];
|
||||
w2[2] = w[pos4 + 10];
|
||||
w2[3] = w[pos4 + 11];
|
||||
w3[0] = w[pos4 + 12];
|
||||
w3[1] = w[pos4 + 13];
|
||||
w3[2] = w[pos4 + 14];
|
||||
w3[3] = w[pos4 + 15];
|
||||
w4[0] = w[pos4 + 16];
|
||||
w4[1] = w[pos4 + 17];
|
||||
w4[2] = w[pos4 + 18];
|
||||
w4[3] = w[pos4 + 19];
|
||||
w5[0] = w[pos4 + 20];
|
||||
w5[1] = w[pos4 + 21];
|
||||
w5[2] = w[pos4 + 22];
|
||||
w5[3] = w[pos4 + 23];
|
||||
w6[0] = w[pos4 + 24];
|
||||
w6[1] = w[pos4 + 25];
|
||||
w6[2] = w[pos4 + 26];
|
||||
w6[3] = w[pos4 + 27];
|
||||
w7[0] = w[pos4 + 28];
|
||||
w7[1] = w[pos4 + 29];
|
||||
w7[2] = w[pos4 + 30];
|
||||
w7[3] = w[pos4 + 31];
|
||||
|
||||
blake2b_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - (u32) pos1);
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_final (blake2b_ctx_t *ctx)
|
||||
{
|
||||
blake2b_transform (ctx->h, ctx->m, ctx->len, BLAKE2B_FINAL);
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_transform_vector (u64x *h, const u64x *m, const u32x len, const u64 f0)
|
||||
{
|
||||
const u64x t0 = hl32_to_64 (0, len);
|
||||
|
||||
u64x v[16];
|
||||
|
||||
v[ 0] = h[0];
|
||||
v[ 1] = h[1];
|
||||
v[ 2] = h[2];
|
||||
v[ 3] = h[3];
|
||||
v[ 4] = h[4];
|
||||
v[ 5] = h[5];
|
||||
v[ 6] = h[6];
|
||||
v[ 7] = h[7];
|
||||
v[ 8] = BLAKE2B_IV_00;
|
||||
v[ 9] = BLAKE2B_IV_01;
|
||||
v[10] = BLAKE2B_IV_02;
|
||||
v[11] = BLAKE2B_IV_03;
|
||||
v[12] = BLAKE2B_IV_04 ^ t0;
|
||||
v[13] = BLAKE2B_IV_05; // ^ t1;
|
||||
v[14] = BLAKE2B_IV_06 ^ f0;
|
||||
v[15] = BLAKE2B_IV_07; // ^ f1;
|
||||
|
||||
BLAKE2B_ROUND_VECTOR ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
BLAKE2B_ROUND_VECTOR (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
|
||||
BLAKE2B_ROUND_VECTOR (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4);
|
||||
BLAKE2B_ROUND_VECTOR ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8);
|
||||
BLAKE2B_ROUND_VECTOR ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13);
|
||||
BLAKE2B_ROUND_VECTOR ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9);
|
||||
BLAKE2B_ROUND_VECTOR (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11);
|
||||
BLAKE2B_ROUND_VECTOR (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10);
|
||||
BLAKE2B_ROUND_VECTOR ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5);
|
||||
BLAKE2B_ROUND_VECTOR (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0);
|
||||
BLAKE2B_ROUND_VECTOR ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
BLAKE2B_ROUND_VECTOR (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
|
||||
|
||||
h[0] = h[0] ^ v[0] ^ v[ 8];
|
||||
h[1] = h[1] ^ v[1] ^ v[ 9];
|
||||
h[2] = h[2] ^ v[2] ^ v[10];
|
||||
h[3] = h[3] ^ v[3] ^ v[11];
|
||||
h[4] = h[4] ^ v[4] ^ v[12];
|
||||
h[5] = h[5] ^ v[5] ^ v[13];
|
||||
h[6] = h[6] ^ v[6] ^ v[14];
|
||||
h[7] = h[7] ^ v[7] ^ v[15];
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_init_vector (blake2b_ctx_vector_t *ctx)
|
||||
{
|
||||
ctx->h[0] = BLAKE2B_IV_00 ^ 0x01010040; // default output length: 0x40 = 64 bytes
|
||||
ctx->h[1] = BLAKE2B_IV_01;
|
||||
ctx->h[2] = BLAKE2B_IV_02;
|
||||
ctx->h[3] = BLAKE2B_IV_03;
|
||||
ctx->h[4] = BLAKE2B_IV_04;
|
||||
ctx->h[5] = BLAKE2B_IV_05;
|
||||
ctx->h[6] = BLAKE2B_IV_06;
|
||||
ctx->h[7] = BLAKE2B_IV_07;
|
||||
|
||||
ctx->m[ 0] = 0;
|
||||
ctx->m[ 1] = 0;
|
||||
ctx->m[ 2] = 0;
|
||||
ctx->m[ 3] = 0;
|
||||
ctx->m[ 4] = 0;
|
||||
ctx->m[ 5] = 0;
|
||||
ctx->m[ 6] = 0;
|
||||
ctx->m[ 7] = 0;
|
||||
ctx->m[ 8] = 0;
|
||||
ctx->m[ 9] = 0;
|
||||
ctx->m[10] = 0;
|
||||
ctx->m[11] = 0;
|
||||
ctx->m[12] = 0;
|
||||
ctx->m[13] = 0;
|
||||
ctx->m[14] = 0;
|
||||
ctx->m[15] = 0;
|
||||
|
||||
ctx->len = 0;
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_update_vector_128 (blake2b_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const u32 len)
|
||||
{
|
||||
MAYBE_VOLATILE const u32 pos = ctx->len & 127;
|
||||
|
||||
if (pos == 0)
|
||||
{
|
||||
if (ctx->len > 0) // if new block (pos == 0) AND the (old) len is not zero => transform
|
||||
{
|
||||
blake2b_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2B_UPDATE);
|
||||
}
|
||||
|
||||
ctx->m[ 0] = hl32_to_64 (w0[1], w0[0]);
|
||||
ctx->m[ 1] = hl32_to_64 (w0[3], w0[2]);
|
||||
ctx->m[ 2] = hl32_to_64 (w1[1], w1[0]);
|
||||
ctx->m[ 3] = hl32_to_64 (w1[3], w1[2]);
|
||||
ctx->m[ 4] = hl32_to_64 (w2[1], w2[0]);
|
||||
ctx->m[ 5] = hl32_to_64 (w2[3], w2[2]);
|
||||
ctx->m[ 6] = hl32_to_64 (w3[1], w3[0]);
|
||||
ctx->m[ 7] = hl32_to_64 (w3[3], w3[2]);
|
||||
ctx->m[ 8] = hl32_to_64 (w4[1], w4[0]);
|
||||
ctx->m[ 9] = hl32_to_64 (w4[3], w4[2]);
|
||||
ctx->m[10] = hl32_to_64 (w5[1], w5[0]);
|
||||
ctx->m[11] = hl32_to_64 (w5[3], w5[2]);
|
||||
ctx->m[12] = hl32_to_64 (w6[1], w6[0]);
|
||||
ctx->m[13] = hl32_to_64 (w6[3], w6[2]);
|
||||
ctx->m[14] = hl32_to_64 (w7[1], w7[0]);
|
||||
ctx->m[15] = hl32_to_64 (w7[3], w7[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((pos + len) <= 128)
|
||||
{
|
||||
switch_buffer_by_offset_8x4_le (w0, w1, w2, w3, w4, w5, w6, w7, pos);
|
||||
|
||||
ctx->m[ 0] |= hl32_to_64 (w0[1], w0[0]);
|
||||
ctx->m[ 1] |= hl32_to_64 (w0[3], w0[2]);
|
||||
ctx->m[ 2] |= hl32_to_64 (w1[1], w1[0]);
|
||||
ctx->m[ 3] |= hl32_to_64 (w1[3], w1[2]);
|
||||
ctx->m[ 4] |= hl32_to_64 (w2[1], w2[0]);
|
||||
ctx->m[ 5] |= hl32_to_64 (w2[3], w2[2]);
|
||||
ctx->m[ 6] |= hl32_to_64 (w3[1], w3[0]);
|
||||
ctx->m[ 7] |= hl32_to_64 (w3[3], w3[2]);
|
||||
ctx->m[ 8] |= hl32_to_64 (w4[1], w4[0]);
|
||||
ctx->m[ 9] |= hl32_to_64 (w4[3], w4[2]);
|
||||
ctx->m[10] |= hl32_to_64 (w5[1], w5[0]);
|
||||
ctx->m[11] |= hl32_to_64 (w5[3], w5[2]);
|
||||
ctx->m[12] |= hl32_to_64 (w6[1], w6[0]);
|
||||
ctx->m[13] |= hl32_to_64 (w6[3], w6[2]);
|
||||
ctx->m[14] |= hl32_to_64 (w7[1], w7[0]);
|
||||
ctx->m[15] |= hl32_to_64 (w7[3], w7[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32x c0[4] = { 0 };
|
||||
u32x c1[4] = { 0 };
|
||||
u32x c2[4] = { 0 };
|
||||
u32x c3[4] = { 0 };
|
||||
u32x c4[4] = { 0 };
|
||||
u32x c5[4] = { 0 };
|
||||
u32x c6[4] = { 0 };
|
||||
u32x c7[4] = { 0 };
|
||||
|
||||
switch_buffer_by_offset_8x4_carry_le (w0, w1, w2, w3, w4, w5, w6, w7, c0, c1, c2, c3, c4, c5, c6, c7, pos);
|
||||
|
||||
ctx->m[ 0] |= hl32_to_64 (w0[1], w0[0]);
|
||||
ctx->m[ 1] |= hl32_to_64 (w0[3], w0[2]);
|
||||
ctx->m[ 2] |= hl32_to_64 (w1[1], w1[0]);
|
||||
ctx->m[ 3] |= hl32_to_64 (w1[3], w1[2]);
|
||||
ctx->m[ 4] |= hl32_to_64 (w2[1], w2[0]);
|
||||
ctx->m[ 5] |= hl32_to_64 (w2[3], w2[2]);
|
||||
ctx->m[ 6] |= hl32_to_64 (w3[1], w3[0]);
|
||||
ctx->m[ 7] |= hl32_to_64 (w3[3], w3[2]);
|
||||
ctx->m[ 8] |= hl32_to_64 (w4[1], w4[0]);
|
||||
ctx->m[ 9] |= hl32_to_64 (w4[3], w4[2]);
|
||||
ctx->m[10] |= hl32_to_64 (w5[1], w5[0]);
|
||||
ctx->m[11] |= hl32_to_64 (w5[3], w5[2]);
|
||||
ctx->m[12] |= hl32_to_64 (w6[1], w6[0]);
|
||||
ctx->m[13] |= hl32_to_64 (w6[3], w6[2]);
|
||||
ctx->m[14] |= hl32_to_64 (w7[1], w7[0]);
|
||||
ctx->m[15] |= hl32_to_64 (w7[3], w7[2]);
|
||||
|
||||
// len must be a multiple of 128 (not ctx->len) for BLAKE2B_UPDATE:
|
||||
|
||||
const u32x cur_len = ((ctx->len + len) / 128) * 128;
|
||||
|
||||
blake2b_transform_vector (ctx->h, ctx->m, cur_len, BLAKE2B_UPDATE);
|
||||
|
||||
ctx->m[ 0] = hl32_to_64 (c0[1], c0[0]);
|
||||
ctx->m[ 1] = hl32_to_64 (c0[3], c0[2]);
|
||||
ctx->m[ 2] = hl32_to_64 (c1[1], c1[0]);
|
||||
ctx->m[ 3] = hl32_to_64 (c1[3], c1[2]);
|
||||
ctx->m[ 4] = hl32_to_64 (c2[1], c2[0]);
|
||||
ctx->m[ 5] = hl32_to_64 (c2[3], c2[2]);
|
||||
ctx->m[ 6] = hl32_to_64 (c3[1], c3[0]);
|
||||
ctx->m[ 7] = hl32_to_64 (c3[3], c3[2]);
|
||||
ctx->m[ 8] = hl32_to_64 (c4[1], c4[0]);
|
||||
ctx->m[ 9] = hl32_to_64 (c4[3], c4[2]);
|
||||
ctx->m[10] = hl32_to_64 (c5[1], c5[0]);
|
||||
ctx->m[11] = hl32_to_64 (c5[3], c5[2]);
|
||||
ctx->m[12] = hl32_to_64 (c6[1], c6[0]);
|
||||
ctx->m[13] = hl32_to_64 (c6[3], c6[2]);
|
||||
ctx->m[14] = hl32_to_64 (c7[1], c7[0]);
|
||||
ctx->m[15] = hl32_to_64 (c7[3], c7[2]);
|
||||
}
|
||||
}
|
||||
|
||||
ctx->len += len;
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const u32 len)
|
||||
{
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
u32x w4[4];
|
||||
u32x w5[4];
|
||||
u32x w6[4];
|
||||
u32x w7[4];
|
||||
|
||||
const int limit = (const int) len - 128; // int type needed, could be negative
|
||||
|
||||
int pos1;
|
||||
int pos4;
|
||||
|
||||
for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 128, pos4 += 32)
|
||||
{
|
||||
w0[0] = w[pos4 + 0];
|
||||
w0[1] = w[pos4 + 1];
|
||||
w0[2] = w[pos4 + 2];
|
||||
w0[3] = w[pos4 + 3];
|
||||
w1[0] = w[pos4 + 4];
|
||||
w1[1] = w[pos4 + 5];
|
||||
w1[2] = w[pos4 + 6];
|
||||
w1[3] = w[pos4 + 7];
|
||||
w2[0] = w[pos4 + 8];
|
||||
w2[1] = w[pos4 + 9];
|
||||
w2[2] = w[pos4 + 10];
|
||||
w2[3] = w[pos4 + 11];
|
||||
w3[0] = w[pos4 + 12];
|
||||
w3[1] = w[pos4 + 13];
|
||||
w3[2] = w[pos4 + 14];
|
||||
w3[3] = w[pos4 + 15];
|
||||
w4[0] = w[pos4 + 16];
|
||||
w4[1] = w[pos4 + 17];
|
||||
w4[2] = w[pos4 + 18];
|
||||
w4[3] = w[pos4 + 19];
|
||||
w5[0] = w[pos4 + 20];
|
||||
w5[1] = w[pos4 + 21];
|
||||
w5[2] = w[pos4 + 22];
|
||||
w5[3] = w[pos4 + 23];
|
||||
w6[0] = w[pos4 + 24];
|
||||
w6[1] = w[pos4 + 25];
|
||||
w6[2] = w[pos4 + 26];
|
||||
w6[3] = w[pos4 + 27];
|
||||
w7[0] = w[pos4 + 28];
|
||||
w7[1] = w[pos4 + 29];
|
||||
w7[2] = w[pos4 + 30];
|
||||
w7[3] = w[pos4 + 31];
|
||||
|
||||
blake2b_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
|
||||
}
|
||||
|
||||
w0[0] = w[pos4 + 0];
|
||||
w0[1] = w[pos4 + 1];
|
||||
w0[2] = w[pos4 + 2];
|
||||
w0[3] = w[pos4 + 3];
|
||||
w1[0] = w[pos4 + 4];
|
||||
w1[1] = w[pos4 + 5];
|
||||
w1[2] = w[pos4 + 6];
|
||||
w1[3] = w[pos4 + 7];
|
||||
w2[0] = w[pos4 + 8];
|
||||
w2[1] = w[pos4 + 9];
|
||||
w2[2] = w[pos4 + 10];
|
||||
w2[3] = w[pos4 + 11];
|
||||
w3[0] = w[pos4 + 12];
|
||||
w3[1] = w[pos4 + 13];
|
||||
w3[2] = w[pos4 + 14];
|
||||
w3[3] = w[pos4 + 15];
|
||||
w4[0] = w[pos4 + 16];
|
||||
w4[1] = w[pos4 + 17];
|
||||
w4[2] = w[pos4 + 18];
|
||||
w4[3] = w[pos4 + 19];
|
||||
w5[0] = w[pos4 + 20];
|
||||
w5[1] = w[pos4 + 21];
|
||||
w5[2] = w[pos4 + 22];
|
||||
w5[3] = w[pos4 + 23];
|
||||
w6[0] = w[pos4 + 24];
|
||||
w6[1] = w[pos4 + 25];
|
||||
w6[2] = w[pos4 + 26];
|
||||
w6[3] = w[pos4 + 27];
|
||||
w7[0] = w[pos4 + 28];
|
||||
w7[1] = w[pos4 + 29];
|
||||
w7[2] = w[pos4 + 30];
|
||||
w7[3] = w[pos4 + 31];
|
||||
|
||||
blake2b_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - (u32) pos1);
|
||||
}
|
||||
|
||||
DECLSPEC void blake2b_final_vector (blake2b_ctx_vector_t *ctx)
|
||||
{
|
||||
blake2b_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2B_FINAL);
|
||||
}
|
90
OpenCL/inc_hash_blake2b.h
Normal file
90
OpenCL/inc_hash_blake2b.h
Normal file
@ -0,0 +1,90 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#ifndef _INC_HASH_BLAKE2B_H
|
||||
#define _INC_HASH_BLAKE2B_H
|
||||
|
||||
#define BLAKE2B_UPDATE 0
|
||||
#define BLAKE2B_FINAL -1
|
||||
|
||||
#define BLAKE2B_G(k0,k1,a,b,c,d) \
|
||||
{ \
|
||||
a = a + b + m[k0]; \
|
||||
d = hc_rotr64_S (d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = hc_rotr64_S (b ^ c, 24); \
|
||||
a = a + b + m[k1]; \
|
||||
d = hc_rotr64_S (d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = hc_rotr64_S (b ^ c, 63); \
|
||||
}
|
||||
|
||||
#define BLAKE2B_ROUND(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \
|
||||
{ \
|
||||
BLAKE2B_G (c0, c1, v[0], v[4], v[ 8], v[12]); \
|
||||
BLAKE2B_G (c2, c3, v[1], v[5], v[ 9], v[13]); \
|
||||
BLAKE2B_G (c4, c5, v[2], v[6], v[10], v[14]); \
|
||||
BLAKE2B_G (c6, c7, v[3], v[7], v[11], v[15]); \
|
||||
BLAKE2B_G (c8, c9, v[0], v[5], v[10], v[15]); \
|
||||
BLAKE2B_G (ca, cb, v[1], v[6], v[11], v[12]); \
|
||||
BLAKE2B_G (cc, cd, v[2], v[7], v[ 8], v[13]); \
|
||||
BLAKE2B_G (ce, cf, v[3], v[4], v[ 9], v[14]); \
|
||||
}
|
||||
|
||||
#define BLAKE2B_G_VECTOR(k0,k1,a,b,c,d) \
|
||||
{ \
|
||||
a = a + b + m[k0]; \
|
||||
d = hc_rotr64 (d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = hc_rotr64 (b ^ c, 24); \
|
||||
a = a + b + m[k1]; \
|
||||
d = hc_rotr64 (d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = hc_rotr64 (b ^ c, 63); \
|
||||
}
|
||||
|
||||
#define BLAKE2B_ROUND_VECTOR(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \
|
||||
{ \
|
||||
BLAKE2B_G_VECTOR (c0, c1, v[0], v[4], v[ 8], v[12]); \
|
||||
BLAKE2B_G_VECTOR (c2, c3, v[1], v[5], v[ 9], v[13]); \
|
||||
BLAKE2B_G_VECTOR (c4, c5, v[2], v[6], v[10], v[14]); \
|
||||
BLAKE2B_G_VECTOR (c6, c7, v[3], v[7], v[11], v[15]); \
|
||||
BLAKE2B_G_VECTOR (c8, c9, v[0], v[5], v[10], v[15]); \
|
||||
BLAKE2B_G_VECTOR (ca, cb, v[1], v[6], v[11], v[12]); \
|
||||
BLAKE2B_G_VECTOR (cc, cd, v[2], v[7], v[ 8], v[13]); \
|
||||
BLAKE2B_G_VECTOR (ce, cf, v[3], v[4], v[ 9], v[14]); \
|
||||
}
|
||||
|
||||
typedef struct blake2b_ctx
|
||||
{
|
||||
u64 m[16]; // buffer
|
||||
u64 h[ 8]; // digest
|
||||
|
||||
u32 len;
|
||||
|
||||
} blake2b_ctx_t;
|
||||
|
||||
typedef struct blake2b_ctx_vector
|
||||
{
|
||||
u64x m[16]; // buffer
|
||||
u64x h[ 8]; // digest
|
||||
|
||||
u32 len;
|
||||
|
||||
} blake2b_ctx_vector_t;
|
||||
|
||||
DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const u32 len, const u64 f0);
|
||||
DECLSPEC void blake2b_init (blake2b_ctx_t *ctx);
|
||||
DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const u32 len);
|
||||
DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const u32 len);
|
||||
DECLSPEC void blake2b_final (blake2b_ctx_t *ctx);
|
||||
|
||||
DECLSPEC void blake2b_transform_vector (u64x *h, const u64x *m, const u32x len, const u64 f0);
|
||||
DECLSPEC void blake2b_init_vector (blake2b_ctx_vector_t *ctx);
|
||||
DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const u32 len);
|
||||
DECLSPEC void blake2b_final_vector (blake2b_ctx_vector_t *ctx);
|
||||
|
||||
|
||||
#endif // _INC_HASH_BLAKE2B_H
|
@ -13,117 +13,15 @@
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_blake2b.cl"
|
||||
#endif
|
||||
|
||||
typedef struct blake2
|
||||
{
|
||||
u64 h[8];
|
||||
u64 t[2];
|
||||
u64 f[2];
|
||||
u32 buflen;
|
||||
u32 outlen;
|
||||
|
||||
} blake2_t;
|
||||
|
||||
#define BLAKE2B_FINAL 1
|
||||
#define BLAKE2B_UPDATE 0
|
||||
|
||||
#define BLAKE2B_G(k0,k1,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[(k0)]; \
|
||||
d = hc_rotr64 (d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = hc_rotr64 (b ^ c, 24); \
|
||||
a = a + b + m[(k1)]; \
|
||||
d = hc_rotr64 (d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = hc_rotr64 (b ^ c, 63); \
|
||||
} while (0)
|
||||
|
||||
#define BLAKE2B_ROUND(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \
|
||||
do { \
|
||||
BLAKE2B_G ((c0),(c1),v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
BLAKE2B_G ((c2),(c3),v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
BLAKE2B_G ((c4),(c5),v[ 2],v[ 6],v[10],v[14]); \
|
||||
BLAKE2B_G ((c6),(c7),v[ 3],v[ 7],v[11],v[15]); \
|
||||
BLAKE2B_G ((c8),(c9),v[ 0],v[ 5],v[10],v[15]); \
|
||||
BLAKE2B_G ((ca),(cb),v[ 1],v[ 6],v[11],v[12]); \
|
||||
BLAKE2B_G ((cc),(cd),v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
BLAKE2B_G ((ce),(cf),v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while (0)
|
||||
|
||||
DECLSPEC void blake2b_transform (u64x *h, u64x *t, u64x *f, u64x *m, u64x *v, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, const u32x out_len, const u8 isFinal)
|
||||
{
|
||||
if (isFinal)
|
||||
f[0] = -1;
|
||||
|
||||
t[0] += hl32_to_64 (0, out_len);
|
||||
|
||||
m[ 0] = hl32_to_64 (w0[1], w0[0]);
|
||||
m[ 1] = hl32_to_64 (w0[3], w0[2]);
|
||||
m[ 2] = hl32_to_64 (w1[1], w1[0]);
|
||||
m[ 3] = hl32_to_64 (w1[3], w1[2]);
|
||||
m[ 4] = hl32_to_64 (w2[1], w2[0]);
|
||||
m[ 5] = hl32_to_64 (w2[3], w2[2]);
|
||||
m[ 6] = hl32_to_64 (w3[1], w3[0]);
|
||||
m[ 7] = hl32_to_64 (w3[3], w3[2]);
|
||||
m[ 8] = 0;
|
||||
m[ 9] = 0;
|
||||
m[10] = 0;
|
||||
m[11] = 0;
|
||||
m[12] = 0;
|
||||
m[13] = 0;
|
||||
m[14] = 0;
|
||||
m[15] = 0;
|
||||
|
||||
v[ 0] = h[0];
|
||||
v[ 1] = h[1];
|
||||
v[ 2] = h[2];
|
||||
v[ 3] = h[3];
|
||||
v[ 4] = h[4];
|
||||
v[ 5] = h[5];
|
||||
v[ 6] = h[6];
|
||||
v[ 7] = h[7];
|
||||
v[ 8] = BLAKE2B_IV_00;
|
||||
v[ 9] = BLAKE2B_IV_01;
|
||||
v[10] = BLAKE2B_IV_02;
|
||||
v[11] = BLAKE2B_IV_03;
|
||||
v[12] = BLAKE2B_IV_04 ^ t[0];
|
||||
v[13] = BLAKE2B_IV_05 ^ t[1];
|
||||
v[14] = BLAKE2B_IV_06 ^ f[0];
|
||||
v[15] = BLAKE2B_IV_07 ^ f[1];
|
||||
|
||||
BLAKE2B_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
BLAKE2B_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
|
||||
BLAKE2B_ROUND (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4);
|
||||
BLAKE2B_ROUND ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8);
|
||||
BLAKE2B_ROUND ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13);
|
||||
BLAKE2B_ROUND ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9);
|
||||
BLAKE2B_ROUND (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11);
|
||||
BLAKE2B_ROUND (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10);
|
||||
BLAKE2B_ROUND ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5);
|
||||
BLAKE2B_ROUND (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0);
|
||||
BLAKE2B_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
BLAKE2B_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
|
||||
|
||||
h[0] = h[0] ^ v[0] ^ v[ 8];
|
||||
h[1] = h[1] ^ v[1] ^ v[ 9];
|
||||
h[2] = h[2] ^ v[2] ^ v[10];
|
||||
h[3] = h[3] ^ v[3] ^ v[11];
|
||||
h[4] = h[4] ^ v[4] ^ v[12];
|
||||
h[5] = h[5] ^ v[5] ^ v[13];
|
||||
h[6] = h[6] ^ v[6] ^ v[14];
|
||||
h[7] = h[7] ^ v[7] ^ v[15];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_m04 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_m04 (KERN_ATTR_RULES ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
@ -142,24 +40,6 @@ KERNEL_FQ void m00600_m04 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
u64 tmp_h[8];
|
||||
u64 tmp_t[2];
|
||||
u64 tmp_f[2];
|
||||
|
||||
tmp_h[0] = esalt_bufs[digests_offset].h[0];
|
||||
tmp_h[1] = esalt_bufs[digests_offset].h[1];
|
||||
tmp_h[2] = esalt_bufs[digests_offset].h[2];
|
||||
tmp_h[3] = esalt_bufs[digests_offset].h[3];
|
||||
tmp_h[4] = esalt_bufs[digests_offset].h[4];
|
||||
tmp_h[5] = esalt_bufs[digests_offset].h[5];
|
||||
tmp_h[6] = esalt_bufs[digests_offset].h[6];
|
||||
tmp_h[7] = esalt_bufs[digests_offset].h[7];
|
||||
|
||||
tmp_t[0] = esalt_bufs[digests_offset].t[0];
|
||||
tmp_t[1] = esalt_bufs[digests_offset].t[1];
|
||||
tmp_f[0] = esalt_bufs[digests_offset].f[0];
|
||||
tmp_f[1] = esalt_bufs[digests_offset].f[1];
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
@ -173,64 +53,61 @@ KERNEL_FQ void m00600_m04 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
|
||||
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
u64x digest[8];
|
||||
u64x m[16];
|
||||
u64x v[16];
|
||||
|
||||
m[ 0] = hl32_to_64 (w0[1], w0[0]);
|
||||
m[ 1] = hl32_to_64 (w0[3], w0[2]);
|
||||
m[ 2] = hl32_to_64 (w1[1], w1[0]);
|
||||
m[ 3] = hl32_to_64 (w1[3], w1[2]);
|
||||
m[ 4] = hl32_to_64 (w2[1], w2[0]);
|
||||
m[ 5] = hl32_to_64 (w2[3], w2[2]);
|
||||
m[ 6] = hl32_to_64 (w3[1], w3[0]);
|
||||
m[ 7] = hl32_to_64 (w3[3], w3[2]);
|
||||
m[ 8] = 0;
|
||||
m[ 9] = 0;
|
||||
m[10] = 0;
|
||||
m[11] = 0;
|
||||
m[12] = 0;
|
||||
m[13] = 0;
|
||||
m[14] = 0;
|
||||
m[15] = 0;
|
||||
|
||||
u64x h[8];
|
||||
u64x t[2];
|
||||
u64x f[2];
|
||||
|
||||
h[0] = tmp_h[0];
|
||||
h[1] = tmp_h[1];
|
||||
h[2] = tmp_h[2];
|
||||
h[3] = tmp_h[3];
|
||||
h[4] = tmp_h[4];
|
||||
h[5] = tmp_h[5];
|
||||
h[6] = tmp_h[6];
|
||||
h[7] = tmp_h[7];
|
||||
h[0] = BLAKE2B_IV_00 ^ 0x01010040;
|
||||
h[1] = BLAKE2B_IV_01;
|
||||
h[2] = BLAKE2B_IV_02;
|
||||
h[3] = BLAKE2B_IV_03;
|
||||
h[4] = BLAKE2B_IV_04;
|
||||
h[5] = BLAKE2B_IV_05;
|
||||
h[6] = BLAKE2B_IV_06;
|
||||
h[7] = BLAKE2B_IV_07;
|
||||
|
||||
t[0] = tmp_t[0];
|
||||
t[1] = tmp_t[1];
|
||||
f[0] = tmp_f[0];
|
||||
f[1] = tmp_f[1];
|
||||
blake2b_transform_vector (h, m, out_len, BLAKE2B_FINAL);
|
||||
|
||||
blake2b_transform (h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL);
|
||||
|
||||
digest[0] = h[0];
|
||||
digest[1] = h[1];
|
||||
digest[2] = h[2];
|
||||
digest[3] = h[3];
|
||||
digest[4] = h[4];
|
||||
digest[5] = h[5];
|
||||
digest[6] = h[6];
|
||||
digest[7] = h[7];
|
||||
|
||||
const u32x r0 = h32_from_64 (digest[0]);
|
||||
const u32x r1 = l32_from_64 (digest[0]);
|
||||
const u32x r2 = h32_from_64 (digest[1]);
|
||||
const u32x r3 = l32_from_64 (digest[1]);
|
||||
const u32x r0 = h32_from_64 (h[0]);
|
||||
const u32x r1 = l32_from_64 (h[0]);
|
||||
const u32x r2 = h32_from_64 (h[1]);
|
||||
const u32x r3 = l32_from_64 (h[1]);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_m08 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_m08 (KERN_ATTR_RULES ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_m16 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_m16 (KERN_ATTR_RULES ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_s04 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_s04 (KERN_ATTR_RULES ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
@ -249,24 +126,6 @@ KERNEL_FQ void m00600_s04 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
u64 tmp_h[8];
|
||||
u64 tmp_t[2];
|
||||
u64 tmp_f[2];
|
||||
|
||||
tmp_h[0] = esalt_bufs[digests_offset].h[0];
|
||||
tmp_h[1] = esalt_bufs[digests_offset].h[1];
|
||||
tmp_h[2] = esalt_bufs[digests_offset].h[2];
|
||||
tmp_h[3] = esalt_bufs[digests_offset].h[3];
|
||||
tmp_h[4] = esalt_bufs[digests_offset].h[4];
|
||||
tmp_h[5] = esalt_bufs[digests_offset].h[5];
|
||||
tmp_h[6] = esalt_bufs[digests_offset].h[6];
|
||||
tmp_h[7] = esalt_bufs[digests_offset].h[7];
|
||||
|
||||
tmp_t[0] = esalt_bufs[digests_offset].t[0];
|
||||
tmp_t[1] = esalt_bufs[digests_offset].t[1];
|
||||
tmp_f[0] = esalt_bufs[digests_offset].f[0];
|
||||
tmp_f[1] = esalt_bufs[digests_offset].f[1];
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -292,52 +151,51 @@ KERNEL_FQ void m00600_s04 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
|
||||
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
u64x digest[8];
|
||||
u64x m[16];
|
||||
u64x v[16];
|
||||
|
||||
m[ 0] = hl32_to_64 (w0[1], w0[0]);
|
||||
m[ 1] = hl32_to_64 (w0[3], w0[2]);
|
||||
m[ 2] = hl32_to_64 (w1[1], w1[0]);
|
||||
m[ 3] = hl32_to_64 (w1[3], w1[2]);
|
||||
m[ 4] = hl32_to_64 (w2[1], w2[0]);
|
||||
m[ 5] = hl32_to_64 (w2[3], w2[2]);
|
||||
m[ 6] = hl32_to_64 (w3[1], w3[0]);
|
||||
m[ 7] = hl32_to_64 (w3[3], w3[2]);
|
||||
m[ 8] = 0;
|
||||
m[ 9] = 0;
|
||||
m[10] = 0;
|
||||
m[11] = 0;
|
||||
m[12] = 0;
|
||||
m[13] = 0;
|
||||
m[14] = 0;
|
||||
m[15] = 0;
|
||||
|
||||
u64x h[8];
|
||||
u64x t[2];
|
||||
u64x f[2];
|
||||
|
||||
h[0] = tmp_h[0];
|
||||
h[1] = tmp_h[1];
|
||||
h[2] = tmp_h[2];
|
||||
h[3] = tmp_h[3];
|
||||
h[4] = tmp_h[4];
|
||||
h[5] = tmp_h[5];
|
||||
h[6] = tmp_h[6];
|
||||
h[7] = tmp_h[7];
|
||||
h[0] = BLAKE2B_IV_00 ^ 0x01010040;
|
||||
h[1] = BLAKE2B_IV_01;
|
||||
h[2] = BLAKE2B_IV_02;
|
||||
h[3] = BLAKE2B_IV_03;
|
||||
h[4] = BLAKE2B_IV_04;
|
||||
h[5] = BLAKE2B_IV_05;
|
||||
h[6] = BLAKE2B_IV_06;
|
||||
h[7] = BLAKE2B_IV_07;
|
||||
|
||||
t[0] = tmp_t[0];
|
||||
t[1] = tmp_t[1];
|
||||
f[0] = tmp_f[0];
|
||||
f[1] = tmp_f[1];
|
||||
blake2b_transform_vector (h, m, out_len, BLAKE2B_FINAL);
|
||||
|
||||
blake2b_transform (h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL);
|
||||
|
||||
digest[0] = h[0];
|
||||
digest[1] = h[1];
|
||||
digest[2] = h[2];
|
||||
digest[3] = h[3];
|
||||
digest[4] = h[4];
|
||||
digest[5] = h[5];
|
||||
digest[6] = h[6];
|
||||
digest[7] = h[7];
|
||||
|
||||
const u32x r0 = h32_from_64 (digest[0]);
|
||||
const u32x r1 = l32_from_64 (digest[0]);
|
||||
const u32x r2 = h32_from_64 (digest[1]);
|
||||
const u32x r3 = l32_from_64 (digest[1]);
|
||||
const u32x r0 = h32_from_64 (h[0]);
|
||||
const u32x r1 = l32_from_64 (h[0]);
|
||||
const u32x r2 = h32_from_64 (h[1]);
|
||||
const u32x r3 = l32_from_64 (h[1]);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_s08 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_s08 (KERN_ATTR_RULES ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_s16 (KERN_ATTR_RULES_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_s16 (KERN_ATTR_RULES ())
|
||||
{
|
||||
}
|
||||
|
111
OpenCL/m00600_a0-pure.cl
Normal file
111
OpenCL/m00600_a0-pure.cl
Normal file
@ -0,0 +1,111 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp.h"
|
||||
#include "inc_rp.cl"
|
||||
#include "inc_scalar.cl"
|
||||
#include "inc_hash_blake2b.cl"
|
||||
#endif
|
||||
|
||||
KERNEL_FQ void m00600_mxx (KERN_ATTR_RULES ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
COPY_PW (pws[gid]);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
|
||||
{
|
||||
pw_t tmp = PASTE_PW;
|
||||
|
||||
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
|
||||
|
||||
blake2b_ctx_t ctx;
|
||||
|
||||
blake2b_init (&ctx);
|
||||
blake2b_update (&ctx, tmp.i, tmp.pw_len);
|
||||
blake2b_final (&ctx);
|
||||
|
||||
const u32 r0 = h32_from_64_S (ctx.h[0]);
|
||||
const u32 r1 = l32_from_64_S (ctx.h[0]);
|
||||
const u32 r2 = h32_from_64_S (ctx.h[1]);
|
||||
const u32 r3 = l32_from_64_S (ctx.h[1]);
|
||||
|
||||
COMPARE_M_SCALAR (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_sxx (KERN_ATTR_RULES ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
COPY_PW (pws[gid]);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
|
||||
{
|
||||
pw_t tmp = PASTE_PW;
|
||||
|
||||
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
|
||||
|
||||
blake2b_ctx_t ctx;
|
||||
|
||||
blake2b_init (&ctx);
|
||||
blake2b_update (&ctx, tmp.i, tmp.pw_len);
|
||||
blake2b_final (&ctx);
|
||||
|
||||
const u32 r0 = h32_from_64_S (ctx.h[0]);
|
||||
const u32 r1 = l32_from_64_S (ctx.h[0]);
|
||||
const u32 r2 = h32_from_64_S (ctx.h[1]);
|
||||
const u32 r3 = l32_from_64_S (ctx.h[1]);
|
||||
|
||||
COMPARE_S_SCALAR (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
@ -11,117 +11,18 @@
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_blake2b.cl"
|
||||
#endif
|
||||
|
||||
typedef struct blake2
|
||||
{
|
||||
u64 h[8];
|
||||
u64 t[2];
|
||||
u64 f[2];
|
||||
u32 buflen;
|
||||
u32 outlen;
|
||||
|
||||
} blake2_t;
|
||||
|
||||
#define BLAKE2B_FINAL 1
|
||||
#define BLAKE2B_UPDATE 0
|
||||
|
||||
#define BLAKE2B_G(k0,k1,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[(k0)]; \
|
||||
d = hc_rotr64 (d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = hc_rotr64 (b ^ c, 24); \
|
||||
a = a + b + m[(k1)]; \
|
||||
d = hc_rotr64 (d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = hc_rotr64 (b ^ c, 63); \
|
||||
} while (0)
|
||||
|
||||
#define BLAKE2B_ROUND(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \
|
||||
do { \
|
||||
BLAKE2B_G ((c0),(c1),v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
BLAKE2B_G ((c2),(c3),v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
BLAKE2B_G ((c4),(c5),v[ 2],v[ 6],v[10],v[14]); \
|
||||
BLAKE2B_G ((c6),(c7),v[ 3],v[ 7],v[11],v[15]); \
|
||||
BLAKE2B_G ((c8),(c9),v[ 0],v[ 5],v[10],v[15]); \
|
||||
BLAKE2B_G ((ca),(cb),v[ 1],v[ 6],v[11],v[12]); \
|
||||
BLAKE2B_G ((cc),(cd),v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
BLAKE2B_G ((ce),(cf),v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while (0)
|
||||
|
||||
DECLSPEC void blake2b_transform (u64x *h, u64x *t, u64x *f, u64x *m, u64x *v, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, const u32x out_len, const u8 isFinal)
|
||||
{
|
||||
if (isFinal)
|
||||
f[0] = -1;
|
||||
|
||||
t[0] += hl32_to_64 (0, out_len);
|
||||
|
||||
m[ 0] = hl32_to_64 (w0[1], w0[0]);
|
||||
m[ 1] = hl32_to_64 (w0[3], w0[2]);
|
||||
m[ 2] = hl32_to_64 (w1[1], w1[0]);
|
||||
m[ 3] = hl32_to_64 (w1[3], w1[2]);
|
||||
m[ 4] = hl32_to_64 (w2[1], w2[0]);
|
||||
m[ 5] = hl32_to_64 (w2[3], w2[2]);
|
||||
m[ 6] = hl32_to_64 (w3[1], w3[0]);
|
||||
m[ 7] = hl32_to_64 (w3[3], w3[2]);
|
||||
m[ 8] = 0;
|
||||
m[ 9] = 0;
|
||||
m[10] = 0;
|
||||
m[11] = 0;
|
||||
m[12] = 0;
|
||||
m[13] = 0;
|
||||
m[14] = 0;
|
||||
m[15] = 0;
|
||||
|
||||
v[ 0] = h[0];
|
||||
v[ 1] = h[1];
|
||||
v[ 2] = h[2];
|
||||
v[ 3] = h[3];
|
||||
v[ 4] = h[4];
|
||||
v[ 5] = h[5];
|
||||
v[ 6] = h[6];
|
||||
v[ 7] = h[7];
|
||||
v[ 8] = BLAKE2B_IV_00;
|
||||
v[ 9] = BLAKE2B_IV_01;
|
||||
v[10] = BLAKE2B_IV_02;
|
||||
v[11] = BLAKE2B_IV_03;
|
||||
v[12] = BLAKE2B_IV_04 ^ t[0];
|
||||
v[13] = BLAKE2B_IV_05 ^ t[1];
|
||||
v[14] = BLAKE2B_IV_06 ^ f[0];
|
||||
v[15] = BLAKE2B_IV_07 ^ f[1];
|
||||
|
||||
BLAKE2B_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
BLAKE2B_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
|
||||
BLAKE2B_ROUND (11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4);
|
||||
BLAKE2B_ROUND ( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8);
|
||||
BLAKE2B_ROUND ( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13);
|
||||
BLAKE2B_ROUND ( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9);
|
||||
BLAKE2B_ROUND (12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11);
|
||||
BLAKE2B_ROUND (13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10);
|
||||
BLAKE2B_ROUND ( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5);
|
||||
BLAKE2B_ROUND (10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0);
|
||||
BLAKE2B_ROUND ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
BLAKE2B_ROUND (14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3);
|
||||
|
||||
h[0] = h[0] ^ v[0] ^ v[ 8];
|
||||
h[1] = h[1] ^ v[1] ^ v[ 9];
|
||||
h[2] = h[2] ^ v[2] ^ v[10];
|
||||
h[3] = h[3] ^ v[3] ^ v[11];
|
||||
h[4] = h[4] ^ v[4] ^ v[12];
|
||||
h[5] = h[5] ^ v[5] ^ v[13];
|
||||
h[6] = h[6] ^ v[6] ^ v[14];
|
||||
h[7] = h[7] ^ v[7] ^ v[15];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_m04 (KERN_ATTR_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_m04 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
@ -137,24 +38,6 @@ KERNEL_FQ void m00600_m04 (KERN_ATTR_ESALT (blake2_t))
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len & 63;
|
||||
|
||||
u64 tmp_h[8];
|
||||
u64 tmp_t[2];
|
||||
u64 tmp_f[2];
|
||||
|
||||
tmp_h[0] = esalt_bufs[digests_offset].h[0];
|
||||
tmp_h[1] = esalt_bufs[digests_offset].h[1];
|
||||
tmp_h[2] = esalt_bufs[digests_offset].h[2];
|
||||
tmp_h[3] = esalt_bufs[digests_offset].h[3];
|
||||
tmp_h[4] = esalt_bufs[digests_offset].h[4];
|
||||
tmp_h[5] = esalt_bufs[digests_offset].h[5];
|
||||
tmp_h[6] = esalt_bufs[digests_offset].h[6];
|
||||
tmp_h[7] = esalt_bufs[digests_offset].h[7];
|
||||
|
||||
tmp_t[0] = esalt_bufs[digests_offset].t[0];
|
||||
tmp_t[1] = esalt_bufs[digests_offset].t[1];
|
||||
tmp_f[0] = esalt_bufs[digests_offset].f[0];
|
||||
tmp_f[1] = esalt_bufs[digests_offset].f[1];
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
@ -228,64 +111,61 @@ KERNEL_FQ void m00600_m04 (KERN_ATTR_ESALT (blake2_t))
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u64x digest[8];
|
||||
u64x m[16];
|
||||
u64x v[16];
|
||||
|
||||
m[ 0] = hl32_to_64 (w0[1], w0[0]);
|
||||
m[ 1] = hl32_to_64 (w0[3], w0[2]);
|
||||
m[ 2] = hl32_to_64 (w1[1], w1[0]);
|
||||
m[ 3] = hl32_to_64 (w1[3], w1[2]);
|
||||
m[ 4] = hl32_to_64 (w2[1], w2[0]);
|
||||
m[ 5] = hl32_to_64 (w2[3], w2[2]);
|
||||
m[ 6] = hl32_to_64 (w3[1], w3[0]);
|
||||
m[ 7] = hl32_to_64 (w3[3], w3[2]);
|
||||
m[ 8] = 0;
|
||||
m[ 9] = 0;
|
||||
m[10] = 0;
|
||||
m[11] = 0;
|
||||
m[12] = 0;
|
||||
m[13] = 0;
|
||||
m[14] = 0;
|
||||
m[15] = 0;
|
||||
|
||||
u64x h[8];
|
||||
u64x t[2];
|
||||
u64x f[2];
|
||||
|
||||
h[0] = tmp_h[0];
|
||||
h[1] = tmp_h[1];
|
||||
h[2] = tmp_h[2];
|
||||
h[3] = tmp_h[3];
|
||||
h[4] = tmp_h[4];
|
||||
h[5] = tmp_h[5];
|
||||
h[6] = tmp_h[6];
|
||||
h[7] = tmp_h[7];
|
||||
h[0] = BLAKE2B_IV_00 ^ 0x01010040;
|
||||
h[1] = BLAKE2B_IV_01;
|
||||
h[2] = BLAKE2B_IV_02;
|
||||
h[3] = BLAKE2B_IV_03;
|
||||
h[4] = BLAKE2B_IV_04;
|
||||
h[5] = BLAKE2B_IV_05;
|
||||
h[6] = BLAKE2B_IV_06;
|
||||
h[7] = BLAKE2B_IV_07;
|
||||
|
||||
t[0] = tmp_t[0];
|
||||
t[1] = tmp_t[1];
|
||||
f[0] = tmp_f[0];
|
||||
f[1] = tmp_f[1];
|
||||
blake2b_transform_vector (h, m, out_len, BLAKE2B_FINAL);
|
||||
|
||||
blake2b_transform (h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL);
|
||||
|
||||
digest[0] = h[0];
|
||||
digest[1] = h[1];
|
||||
digest[2] = h[2];
|
||||
digest[3] = h[3];
|
||||
digest[4] = h[4];
|
||||
digest[5] = h[5];
|
||||
digest[6] = h[6];
|
||||
digest[7] = h[7];
|
||||
|
||||
const u32x r0 = h32_from_64 (digest[0]);
|
||||
const u32x r1 = l32_from_64 (digest[0]);
|
||||
const u32x r2 = h32_from_64 (digest[1]);
|
||||
const u32x r3 = l32_from_64 (digest[1]);
|
||||
const u32x r0 = h32_from_64 (h[0]);
|
||||
const u32x r1 = l32_from_64 (h[0]);
|
||||
const u32x r2 = h32_from_64 (h[1]);
|
||||
const u32x r3 = l32_from_64 (h[1]);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_m08 (KERN_ATTR_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_m08 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_m16 (KERN_ATTR_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_m16 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_s04 (KERN_ATTR_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_s04 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
@ -304,24 +184,6 @@ KERNEL_FQ void m00600_s04 (KERN_ATTR_ESALT (blake2_t))
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len & 63;
|
||||
|
||||
u64 tmp_h[8];
|
||||
u64 tmp_t[2];
|
||||
u64 tmp_f[2];
|
||||
|
||||
tmp_h[0] = esalt_bufs[digests_offset].h[0];
|
||||
tmp_h[1] = esalt_bufs[digests_offset].h[1];
|
||||
tmp_h[2] = esalt_bufs[digests_offset].h[2];
|
||||
tmp_h[3] = esalt_bufs[digests_offset].h[3];
|
||||
tmp_h[4] = esalt_bufs[digests_offset].h[4];
|
||||
tmp_h[5] = esalt_bufs[digests_offset].h[5];
|
||||
tmp_h[6] = esalt_bufs[digests_offset].h[6];
|
||||
tmp_h[7] = esalt_bufs[digests_offset].h[7];
|
||||
|
||||
tmp_t[0] = esalt_bufs[digests_offset].t[0];
|
||||
tmp_t[1] = esalt_bufs[digests_offset].t[1];
|
||||
tmp_f[0] = esalt_bufs[digests_offset].f[0];
|
||||
tmp_f[1] = esalt_bufs[digests_offset].f[1];
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -407,52 +269,51 @@ KERNEL_FQ void m00600_s04 (KERN_ATTR_ESALT (blake2_t))
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u64x digest[8];
|
||||
u64x m[16];
|
||||
u64x v[16];
|
||||
|
||||
m[ 0] = hl32_to_64 (w0[1], w0[0]);
|
||||
m[ 1] = hl32_to_64 (w0[3], w0[2]);
|
||||
m[ 2] = hl32_to_64 (w1[1], w1[0]);
|
||||
m[ 3] = hl32_to_64 (w1[3], w1[2]);
|
||||
m[ 4] = hl32_to_64 (w2[1], w2[0]);
|
||||
m[ 5] = hl32_to_64 (w2[3], w2[2]);
|
||||
m[ 6] = hl32_to_64 (w3[1], w3[0]);
|
||||
m[ 7] = hl32_to_64 (w3[3], w3[2]);
|
||||
m[ 8] = 0;
|
||||
m[ 9] = 0;
|
||||
m[10] = 0;
|
||||
m[11] = 0;
|
||||
m[12] = 0;
|
||||
m[13] = 0;
|
||||
m[14] = 0;
|
||||
m[15] = 0;
|
||||
|
||||
u64x h[8];
|
||||
u64x t[2];
|
||||
u64x f[2];
|
||||
|
||||
h[0] = tmp_h[0];
|
||||
h[1] = tmp_h[1];
|
||||
h[2] = tmp_h[2];
|
||||
h[3] = tmp_h[3];
|
||||
h[4] = tmp_h[4];
|
||||
h[5] = tmp_h[5];
|
||||
h[6] = tmp_h[6];
|
||||
h[7] = tmp_h[7];
|
||||
h[0] = BLAKE2B_IV_00 ^ 0x01010040;
|
||||
h[1] = BLAKE2B_IV_01;
|
||||
h[2] = BLAKE2B_IV_02;
|
||||
h[3] = BLAKE2B_IV_03;
|
||||
h[4] = BLAKE2B_IV_04;
|
||||
h[5] = BLAKE2B_IV_05;
|
||||
h[6] = BLAKE2B_IV_06;
|
||||
h[7] = BLAKE2B_IV_07;
|
||||
|
||||
t[0] = tmp_t[0];
|
||||
t[1] = tmp_t[1];
|
||||
f[0] = tmp_f[0];
|
||||
f[1] = tmp_f[1];
|
||||
blake2b_transform_vector (h, m, out_len, BLAKE2B_FINAL);
|
||||
|
||||
blake2b_transform (h, t, f, m, v, w0, w1, w2, w3, out_len, BLAKE2B_FINAL);
|
||||
|
||||
digest[0] = h[0];
|
||||
digest[1] = h[1];
|
||||
digest[2] = h[2];
|
||||
digest[3] = h[3];
|
||||
digest[4] = h[4];
|
||||
digest[5] = h[5];
|
||||
digest[6] = h[6];
|
||||
digest[7] = h[7];
|
||||
|
||||
const u32x r0 = h32_from_64 (digest[0]);
|
||||
const u32x r1 = l32_from_64 (digest[0]);
|
||||
const u32x r2 = h32_from_64 (digest[1]);
|
||||
const u32x r3 = l32_from_64 (digest[1]);
|
||||
const u32x r0 = h32_from_64 (h[0]);
|
||||
const u32x r1 = l32_from_64 (h[0]);
|
||||
const u32x r2 = h32_from_64 (h[1]);
|
||||
const u32x r3 = l32_from_64 (h[1]);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_s08 (KERN_ATTR_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_s08 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_s16 (KERN_ATTR_ESALT (blake2_t))
|
||||
KERNEL_FQ void m00600_s16 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
}
|
||||
|
109
OpenCL/m00600_a1-pure.cl
Normal file
109
OpenCL/m00600_a1-pure.cl
Normal file
@ -0,0 +1,109 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_scalar.cl"
|
||||
#include "inc_hash_blake2b.cl"
|
||||
#endif
|
||||
|
||||
KERNEL_FQ void m00600_mxx (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
blake2b_ctx_t ctx0;
|
||||
|
||||
blake2b_init (&ctx0);
|
||||
|
||||
blake2b_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
|
||||
{
|
||||
blake2b_ctx_t ctx = ctx0;
|
||||
|
||||
blake2b_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
|
||||
|
||||
blake2b_final (&ctx);
|
||||
|
||||
const u32 r0 = h32_from_64_S (ctx.h[0]);
|
||||
const u32 r1 = l32_from_64_S (ctx.h[0]);
|
||||
const u32 r2 = h32_from_64_S (ctx.h[1]);
|
||||
const u32 r3 = l32_from_64_S (ctx.h[1]);
|
||||
|
||||
COMPARE_M_SCALAR (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_sxx (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
blake2b_ctx_t ctx0;
|
||||
|
||||
blake2b_init (&ctx0);
|
||||
|
||||
blake2b_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
|
||||
{
|
||||
blake2b_ctx_t ctx = ctx0;
|
||||
|
||||
blake2b_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
|
||||
|
||||
blake2b_final (&ctx);
|
||||
|
||||
const u32 r0 = h32_from_64_S (ctx.h[0]);
|
||||
const u32 r1 = l32_from_64_S (ctx.h[0]);
|
||||
const u32 r2 = h32_from_64_S (ctx.h[1]);
|
||||
const u32 r3 = l32_from_64_S (ctx.h[1]);
|
||||
|
||||
COMPARE_S_SCALAR (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
131
OpenCL/m00600_a3-pure.cl
Normal file
131
OpenCL/m00600_a3-pure.cl
Normal file
@ -0,0 +1,131 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_blake2b.cl"
|
||||
#endif
|
||||
|
||||
KERNEL_FQ void m00600_mxx (KERN_ATTR_VECTOR ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32x w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32x w0l = w[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
|
||||
|
||||
const u32x w0 = w0l | w0r;
|
||||
|
||||
w[0] = w0;
|
||||
|
||||
blake2b_ctx_vector_t ctx;
|
||||
|
||||
blake2b_init_vector (&ctx);
|
||||
blake2b_update_vector (&ctx, w, pw_len);
|
||||
blake2b_final_vector (&ctx);
|
||||
|
||||
const u32x r0 = h32_from_64 (ctx.h[0]);
|
||||
const u32x r1 = l32_from_64 (ctx.h[0]);
|
||||
const u32x r2 = h32_from_64 (ctx.h[1]);
|
||||
const u32x r3 = l32_from_64 (ctx.h[1]);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m00600_sxx (KERN_ATTR_VECTOR ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32x w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32x w0l = w[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
|
||||
|
||||
const u32x w0 = w0l | w0r;
|
||||
|
||||
w[0] = w0;
|
||||
|
||||
blake2b_ctx_vector_t ctx;
|
||||
|
||||
blake2b_init_vector (&ctx);
|
||||
blake2b_update_vector (&ctx, w, pw_len);
|
||||
blake2b_final_vector (&ctx);
|
||||
|
||||
const u32x r0 = h32_from_64 (ctx.h[0]);
|
||||
const u32x r1 = l32_from_64 (ctx.h[0]);
|
||||
const u32x r2 = h32_from_64 (ctx.h[1]);
|
||||
const u32x r3 = l32_from_64 (ctx.h[1]);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
@ -1,5 +1,11 @@
|
||||
* changes v6.0.0 -> v6.0.x
|
||||
|
||||
##
|
||||
## Algorithms
|
||||
##
|
||||
|
||||
- Added pure kernels for hash-mode 600 (BLAKE2b-512)
|
||||
|
||||
##
|
||||
## Improvements
|
||||
##
|
||||
|
@ -42,31 +42,12 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig,
|
||||
const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; }
|
||||
const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; }
|
||||
|
||||
typedef struct blake2
|
||||
{
|
||||
u64 h[8];
|
||||
u64 t[2];
|
||||
u64 f[2];
|
||||
u32 buflen;
|
||||
u32 outlen;
|
||||
|
||||
} blake2_t;
|
||||
|
||||
static const char *SIGNATURE_BLAKE2B = "$BLAKE2$";
|
||||
|
||||
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u64 esalt_size = (const u64) sizeof (blake2_t);
|
||||
|
||||
return esalt_size;
|
||||
}
|
||||
|
||||
int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
|
||||
{
|
||||
u64 *digest = (u64 *) digest_buf;
|
||||
|
||||
blake2_t *blake2 = (blake2_t *) esalt_buf;
|
||||
|
||||
token_t token;
|
||||
|
||||
token.token_cnt = 2;
|
||||
@ -97,24 +78,6 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
digest[6] = hex_to_u64 (hash_pos + 96);
|
||||
digest[7] = hex_to_u64 (hash_pos + 112);
|
||||
|
||||
// Initialize BLAKE2 Params and State
|
||||
|
||||
memset (blake2, 0, sizeof (blake2_t));
|
||||
|
||||
blake2->h[0] = BLAKE2B_IV_00;
|
||||
blake2->h[1] = BLAKE2B_IV_01;
|
||||
blake2->h[2] = BLAKE2B_IV_02;
|
||||
blake2->h[3] = BLAKE2B_IV_03;
|
||||
blake2->h[4] = BLAKE2B_IV_04;
|
||||
blake2->h[5] = BLAKE2B_IV_05;
|
||||
blake2->h[6] = BLAKE2B_IV_06;
|
||||
blake2->h[7] = BLAKE2B_IV_07;
|
||||
|
||||
// blake2->h[0] ^= 0x0000000001010040; // digest_lenght = 0x40, depth = 0x01, fanout = 0x01
|
||||
blake2->h[0] ^= 0x40 << 0;
|
||||
blake2->h[0] ^= 0x01 << 16;
|
||||
blake2->h[0] ^= 0x01 << 24;
|
||||
|
||||
return (PARSER_OK);
|
||||
}
|
||||
|
||||
@ -161,7 +124,7 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_dgst_pos3 = module_dgst_pos3;
|
||||
module_ctx->module_dgst_size = module_dgst_size;
|
||||
module_ctx->module_dictstat_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_esalt_size = module_esalt_size;
|
||||
module_ctx->module_esalt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
|
||||
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
|
||||
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
|
||||
|
@ -10,7 +10,7 @@ use warnings;
|
||||
|
||||
use Digest::BLAKE2 qw (blake2b_hex);
|
||||
|
||||
sub module_constraints { [[-1, -1], [-1, -1], [0, 55], [-1, -1], [-1, -1]] }
|
||||
sub module_constraints { [[0, 256], [-1, -1], [0, 55], [-1, -1], [-1, -1]] }
|
||||
|
||||
sub module_generate_hash
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user