From b6feddd81fdf7e76800a995b9a73716b7956d24b Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 4 Mar 2020 13:30:09 +0100 Subject: [PATCH] Unroll some of the code in the candidate generators --- OpenCL/amp_a1.cl | 69 +++++++++++++- OpenCL/markov_be.cl | 226 ++++++++++++++++++++++++++++++++++++++++---- OpenCL/markov_le.cl | 225 +++++++++++++++++++++++++++++++++++++++---- OpenCL/shared.cl | 81 ++++++++++++++-- 4 files changed, 548 insertions(+), 53 deletions(-) diff --git a/OpenCL/amp_a1.cl b/OpenCL/amp_a1.cl index 5ab241211..1b0ef4c33 100644 --- a/OpenCL/amp_a1.cl +++ b/OpenCL/amp_a1.cl @@ -34,11 +34,70 @@ KERNEL_FQ void amp (GLOBAL_AS pw_t *pws, GLOBAL_AS pw_t *pws_amp, GLOBAL_AS cons switch_buffer_by_offset_1x64_le_S (pw.i, comb_len); } - #pragma unroll - for (int i = 0; i < 64; i++) - { - pw.i[i] |= comb.i[i]; - } + pw.i[ 0] |= comb.i[ 0]; + pw.i[ 1] |= comb.i[ 1]; + pw.i[ 2] |= comb.i[ 2]; + pw.i[ 3] |= comb.i[ 3]; + pw.i[ 4] |= comb.i[ 4]; + pw.i[ 5] |= comb.i[ 5]; + pw.i[ 6] |= comb.i[ 6]; + pw.i[ 7] |= comb.i[ 7]; + pw.i[ 8] |= comb.i[ 8]; + pw.i[ 9] |= comb.i[ 9]; + pw.i[10] |= comb.i[10]; + pw.i[11] |= comb.i[11]; + pw.i[12] |= comb.i[12]; + pw.i[13] |= comb.i[13]; + pw.i[14] |= comb.i[14]; + pw.i[15] |= comb.i[15]; + pw.i[16] |= comb.i[16]; + pw.i[17] |= comb.i[17]; + pw.i[18] |= comb.i[18]; + pw.i[19] |= comb.i[19]; + pw.i[20] |= comb.i[20]; + pw.i[21] |= comb.i[21]; + pw.i[22] |= comb.i[22]; + pw.i[23] |= comb.i[23]; + pw.i[24] |= comb.i[24]; + pw.i[25] |= comb.i[25]; + pw.i[26] |= comb.i[26]; + pw.i[27] |= comb.i[27]; + pw.i[28] |= comb.i[28]; + pw.i[29] |= comb.i[29]; + pw.i[30] |= comb.i[30]; + pw.i[31] |= comb.i[31]; + pw.i[32] |= comb.i[32]; + pw.i[33] |= comb.i[33]; + pw.i[34] |= comb.i[34]; + pw.i[35] |= comb.i[35]; + pw.i[36] |= comb.i[36]; + pw.i[37] |= comb.i[37]; + pw.i[38] |= comb.i[38]; + pw.i[39] |= comb.i[39]; + pw.i[40] |= comb.i[40]; + pw.i[41] |= comb.i[41]; + pw.i[42] |= comb.i[42]; + pw.i[43] |= comb.i[43]; + pw.i[44] |= comb.i[44]; + pw.i[45] |= comb.i[45]; + pw.i[46] |= comb.i[46]; + pw.i[47] |= comb.i[47]; + pw.i[48] |= comb.i[48]; + pw.i[49] |= comb.i[49]; + pw.i[50] |= comb.i[50]; + pw.i[51] |= comb.i[51]; + pw.i[52] |= comb.i[52]; + pw.i[53] |= comb.i[53]; + pw.i[54] |= comb.i[54]; + pw.i[55] |= comb.i[55]; + pw.i[56] |= comb.i[56]; + pw.i[57] |= comb.i[57]; + pw.i[58] |= comb.i[58]; + pw.i[59] |= comb.i[59]; + pw.i[60] |= comb.i[60]; + pw.i[61] |= comb.i[61]; + pw.i[62] |= comb.i[62]; + pw.i[63] |= comb.i[63]; pw.pw_len = pw_len + comb_len; diff --git a/OpenCL/markov_be.cl b/OpenCL/markov_be.cl index a69e5e691..8463bcfe2 100644 --- a/OpenCL/markov_be.cl +++ b/OpenCL/markov_be.cl @@ -52,17 +52,79 @@ KERNEL_FQ void l_markov (GLOBAL_AS pw_t *pws_buf_l, GLOBAL_AS const cs_t *root_c if (gid >= gid_max) return; - u32 pw_buf[64] = { 0 }; - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid); + pw_t pw; - #pragma unroll - for (int idx = 0; idx < 64; idx++) - { - pws_buf_l[gid].i[idx] = pw_buf[idx]; - } + pw.i[ 0] = 0; + pw.i[ 1] = 0; + pw.i[ 2] = 0; + pw.i[ 3] = 0; + pw.i[ 4] = 0; + pw.i[ 5] = 0; + pw.i[ 6] = 0; + pw.i[ 7] = 0; + pw.i[ 8] = 0; + pw.i[ 9] = 0; + pw.i[10] = 0; + pw.i[11] = 0; + pw.i[12] = 0; + pw.i[13] = 0; + pw.i[14] = 0; + pw.i[15] = 0; + pw.i[16] = 0; + pw.i[17] = 0; + pw.i[18] = 0; + pw.i[19] = 0; + pw.i[20] = 0; + pw.i[21] = 0; + pw.i[22] = 0; + pw.i[23] = 0; + pw.i[24] = 0; + pw.i[25] = 0; + pw.i[26] = 0; + pw.i[27] = 0; + pw.i[28] = 0; + pw.i[29] = 0; + pw.i[30] = 0; + pw.i[31] = 0; + pw.i[32] = 0; + pw.i[33] = 0; + pw.i[34] = 0; + pw.i[35] = 0; + pw.i[36] = 0; + pw.i[37] = 0; + pw.i[38] = 0; + pw.i[39] = 0; + pw.i[40] = 0; + pw.i[41] = 0; + pw.i[42] = 0; + pw.i[43] = 0; + pw.i[44] = 0; + pw.i[45] = 0; + pw.i[46] = 0; + pw.i[47] = 0; + pw.i[48] = 0; + pw.i[49] = 0; + pw.i[50] = 0; + pw.i[51] = 0; + pw.i[52] = 0; + pw.i[53] = 0; + pw.i[54] = 0; + pw.i[55] = 0; + pw.i[56] = 0; + pw.i[57] = 0; + pw.i[58] = 0; + pw.i[59] = 0; + pw.i[60] = 0; + pw.i[61] = 0; + pw.i[62] = 0; + pw.i[63] = 0; - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; + pw.pw_len = pw_l_len + pw_r_len; + + generate_pw (pw.i, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid); + + pws_buf_l[gid] = pw; } KERNEL_FQ void r_markov (GLOBAL_AS bf_t *pws_buf_r, GLOBAL_AS const cs_t *root_css_buf, GLOBAL_AS const cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u64 gid_max) @@ -71,11 +133,76 @@ KERNEL_FQ void r_markov (GLOBAL_AS bf_t *pws_buf_r, GLOBAL_AS const cs_t *root_c if (gid >= gid_max) return; - u32 pw_buf[64] = { 0 }; + pw_t pw; - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid); + pw.i[ 0] = 0; + pw.i[ 1] = 0; + pw.i[ 2] = 0; + pw.i[ 3] = 0; + pw.i[ 4] = 0; + pw.i[ 5] = 0; + pw.i[ 6] = 0; + pw.i[ 7] = 0; + pw.i[ 8] = 0; + pw.i[ 9] = 0; + pw.i[10] = 0; + pw.i[11] = 0; + pw.i[12] = 0; + pw.i[13] = 0; + pw.i[14] = 0; + pw.i[15] = 0; + pw.i[16] = 0; + pw.i[17] = 0; + pw.i[18] = 0; + pw.i[19] = 0; + pw.i[20] = 0; + pw.i[21] = 0; + pw.i[22] = 0; + pw.i[23] = 0; + pw.i[24] = 0; + pw.i[25] = 0; + pw.i[26] = 0; + pw.i[27] = 0; + pw.i[28] = 0; + pw.i[29] = 0; + pw.i[30] = 0; + pw.i[31] = 0; + pw.i[32] = 0; + pw.i[33] = 0; + pw.i[34] = 0; + pw.i[35] = 0; + pw.i[36] = 0; + pw.i[37] = 0; + pw.i[38] = 0; + pw.i[39] = 0; + pw.i[40] = 0; + pw.i[41] = 0; + pw.i[42] = 0; + pw.i[43] = 0; + pw.i[44] = 0; + pw.i[45] = 0; + pw.i[46] = 0; + pw.i[47] = 0; + pw.i[48] = 0; + pw.i[49] = 0; + pw.i[50] = 0; + pw.i[51] = 0; + pw.i[52] = 0; + pw.i[53] = 0; + pw.i[54] = 0; + pw.i[55] = 0; + pw.i[56] = 0; + pw.i[57] = 0; + pw.i[58] = 0; + pw.i[59] = 0; + pw.i[60] = 0; + pw.i[61] = 0; + pw.i[62] = 0; + pw.i[63] = 0; - pws_buf_r[gid].i = pw_buf[0]; + generate_pw (pw.i, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid); + + pws_buf_r[gid].i = pw.i[0]; } KERNEL_FQ void C_markov (GLOBAL_AS pw_t *pws_buf, GLOBAL_AS const cs_t *root_css_buf, GLOBAL_AS const cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u64 gid_max) @@ -84,15 +211,76 @@ KERNEL_FQ void C_markov (GLOBAL_AS pw_t *pws_buf, GLOBAL_AS const cs_t *root_css if (gid >= gid_max) return; - u32 pw_buf[64] = { 0 }; + pw_t pw; - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid); + pw.i[ 0] = 0; + pw.i[ 1] = 0; + pw.i[ 2] = 0; + pw.i[ 3] = 0; + pw.i[ 4] = 0; + pw.i[ 5] = 0; + pw.i[ 6] = 0; + pw.i[ 7] = 0; + pw.i[ 8] = 0; + pw.i[ 9] = 0; + pw.i[10] = 0; + pw.i[11] = 0; + pw.i[12] = 0; + pw.i[13] = 0; + pw.i[14] = 0; + pw.i[15] = 0; + pw.i[16] = 0; + pw.i[17] = 0; + pw.i[18] = 0; + pw.i[19] = 0; + pw.i[20] = 0; + pw.i[21] = 0; + pw.i[22] = 0; + pw.i[23] = 0; + pw.i[24] = 0; + pw.i[25] = 0; + pw.i[26] = 0; + pw.i[27] = 0; + pw.i[28] = 0; + pw.i[29] = 0; + pw.i[30] = 0; + pw.i[31] = 0; + pw.i[32] = 0; + pw.i[33] = 0; + pw.i[34] = 0; + pw.i[35] = 0; + pw.i[36] = 0; + pw.i[37] = 0; + pw.i[38] = 0; + pw.i[39] = 0; + pw.i[40] = 0; + pw.i[41] = 0; + pw.i[42] = 0; + pw.i[43] = 0; + pw.i[44] = 0; + pw.i[45] = 0; + pw.i[46] = 0; + pw.i[47] = 0; + pw.i[48] = 0; + pw.i[49] = 0; + pw.i[50] = 0; + pw.i[51] = 0; + pw.i[52] = 0; + pw.i[53] = 0; + pw.i[54] = 0; + pw.i[55] = 0; + pw.i[56] = 0; + pw.i[57] = 0; + pw.i[58] = 0; + pw.i[59] = 0; + pw.i[60] = 0; + pw.i[61] = 0; + pw.i[62] = 0; + pw.i[63] = 0; - #pragma unroll - for (int idx = 0; idx < 64; idx++) - { - pws_buf[gid].i[idx] = pw_buf[idx]; - } + pw.pw_len = pw_len; - pws_buf[gid].pw_len = pw_len; + generate_pw (pw.i, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid); + + pws_buf[gid] = pw; } diff --git a/OpenCL/markov_le.cl b/OpenCL/markov_le.cl index f1feb7819..530b8acc4 100644 --- a/OpenCL/markov_le.cl +++ b/OpenCL/markov_le.cl @@ -52,17 +52,78 @@ KERNEL_FQ void l_markov (GLOBAL_AS pw_t *pws_buf_l, GLOBAL_AS const cs_t *root_c if (gid >= gid_max) return; - u32 pw_buf[64] = { 0 }; + pw_t pw; - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid); + pw.i[ 0] = 0; + pw.i[ 1] = 0; + pw.i[ 2] = 0; + pw.i[ 3] = 0; + pw.i[ 4] = 0; + pw.i[ 5] = 0; + pw.i[ 6] = 0; + pw.i[ 7] = 0; + pw.i[ 8] = 0; + pw.i[ 9] = 0; + pw.i[10] = 0; + pw.i[11] = 0; + pw.i[12] = 0; + pw.i[13] = 0; + pw.i[14] = 0; + pw.i[15] = 0; + pw.i[16] = 0; + pw.i[17] = 0; + pw.i[18] = 0; + pw.i[19] = 0; + pw.i[20] = 0; + pw.i[21] = 0; + pw.i[22] = 0; + pw.i[23] = 0; + pw.i[24] = 0; + pw.i[25] = 0; + pw.i[26] = 0; + pw.i[27] = 0; + pw.i[28] = 0; + pw.i[29] = 0; + pw.i[30] = 0; + pw.i[31] = 0; + pw.i[32] = 0; + pw.i[33] = 0; + pw.i[34] = 0; + pw.i[35] = 0; + pw.i[36] = 0; + pw.i[37] = 0; + pw.i[38] = 0; + pw.i[39] = 0; + pw.i[40] = 0; + pw.i[41] = 0; + pw.i[42] = 0; + pw.i[43] = 0; + pw.i[44] = 0; + pw.i[45] = 0; + pw.i[46] = 0; + pw.i[47] = 0; + pw.i[48] = 0; + pw.i[49] = 0; + pw.i[50] = 0; + pw.i[51] = 0; + pw.i[52] = 0; + pw.i[53] = 0; + pw.i[54] = 0; + pw.i[55] = 0; + pw.i[56] = 0; + pw.i[57] = 0; + pw.i[58] = 0; + pw.i[59] = 0; + pw.i[60] = 0; + pw.i[61] = 0; + pw.i[62] = 0; + pw.i[63] = 0; - #pragma unroll - for (int idx = 0; idx < 64; idx++) - { - pws_buf_l[gid].i[idx] = pw_buf[idx]; - } + pw.pw_len = pw_l_len + pw_r_len; - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; + generate_pw (pw.i, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid); + + pws_buf_l[gid] = pw; } KERNEL_FQ void r_markov (GLOBAL_AS bf_t *pws_buf_r, GLOBAL_AS const cs_t *root_css_buf, GLOBAL_AS const cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u64 gid_max) @@ -71,11 +132,76 @@ KERNEL_FQ void r_markov (GLOBAL_AS bf_t *pws_buf_r, GLOBAL_AS const cs_t *root_c if (gid >= gid_max) return; - u32 pw_buf[64] = { 0 }; + pw_t pw; - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid); + pw.i[ 0] = 0; + pw.i[ 1] = 0; + pw.i[ 2] = 0; + pw.i[ 3] = 0; + pw.i[ 4] = 0; + pw.i[ 5] = 0; + pw.i[ 6] = 0; + pw.i[ 7] = 0; + pw.i[ 8] = 0; + pw.i[ 9] = 0; + pw.i[10] = 0; + pw.i[11] = 0; + pw.i[12] = 0; + pw.i[13] = 0; + pw.i[14] = 0; + pw.i[15] = 0; + pw.i[16] = 0; + pw.i[17] = 0; + pw.i[18] = 0; + pw.i[19] = 0; + pw.i[20] = 0; + pw.i[21] = 0; + pw.i[22] = 0; + pw.i[23] = 0; + pw.i[24] = 0; + pw.i[25] = 0; + pw.i[26] = 0; + pw.i[27] = 0; + pw.i[28] = 0; + pw.i[29] = 0; + pw.i[30] = 0; + pw.i[31] = 0; + pw.i[32] = 0; + pw.i[33] = 0; + pw.i[34] = 0; + pw.i[35] = 0; + pw.i[36] = 0; + pw.i[37] = 0; + pw.i[38] = 0; + pw.i[39] = 0; + pw.i[40] = 0; + pw.i[41] = 0; + pw.i[42] = 0; + pw.i[43] = 0; + pw.i[44] = 0; + pw.i[45] = 0; + pw.i[46] = 0; + pw.i[47] = 0; + pw.i[48] = 0; + pw.i[49] = 0; + pw.i[50] = 0; + pw.i[51] = 0; + pw.i[52] = 0; + pw.i[53] = 0; + pw.i[54] = 0; + pw.i[55] = 0; + pw.i[56] = 0; + pw.i[57] = 0; + pw.i[58] = 0; + pw.i[59] = 0; + pw.i[60] = 0; + pw.i[61] = 0; + pw.i[62] = 0; + pw.i[63] = 0; - pws_buf_r[gid].i = pw_buf[0]; + generate_pw (pw.i, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid); + + pws_buf_r[gid].i = pw.i[0]; } KERNEL_FQ void C_markov (GLOBAL_AS pw_t *pws_buf, GLOBAL_AS const cs_t *root_css_buf, GLOBAL_AS const cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u64 gid_max) @@ -84,15 +210,76 @@ KERNEL_FQ void C_markov (GLOBAL_AS pw_t *pws_buf, GLOBAL_AS const cs_t *root_css if (gid >= gid_max) return; - u32 pw_buf[64] = { 0 }; + pw_t pw; - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid); + pw.i[ 0] = 0; + pw.i[ 1] = 0; + pw.i[ 2] = 0; + pw.i[ 3] = 0; + pw.i[ 4] = 0; + pw.i[ 5] = 0; + pw.i[ 6] = 0; + pw.i[ 7] = 0; + pw.i[ 8] = 0; + pw.i[ 9] = 0; + pw.i[10] = 0; + pw.i[11] = 0; + pw.i[12] = 0; + pw.i[13] = 0; + pw.i[14] = 0; + pw.i[15] = 0; + pw.i[16] = 0; + pw.i[17] = 0; + pw.i[18] = 0; + pw.i[19] = 0; + pw.i[20] = 0; + pw.i[21] = 0; + pw.i[22] = 0; + pw.i[23] = 0; + pw.i[24] = 0; + pw.i[25] = 0; + pw.i[26] = 0; + pw.i[27] = 0; + pw.i[28] = 0; + pw.i[29] = 0; + pw.i[30] = 0; + pw.i[31] = 0; + pw.i[32] = 0; + pw.i[33] = 0; + pw.i[34] = 0; + pw.i[35] = 0; + pw.i[36] = 0; + pw.i[37] = 0; + pw.i[38] = 0; + pw.i[39] = 0; + pw.i[40] = 0; + pw.i[41] = 0; + pw.i[42] = 0; + pw.i[43] = 0; + pw.i[44] = 0; + pw.i[45] = 0; + pw.i[46] = 0; + pw.i[47] = 0; + pw.i[48] = 0; + pw.i[49] = 0; + pw.i[50] = 0; + pw.i[51] = 0; + pw.i[52] = 0; + pw.i[53] = 0; + pw.i[54] = 0; + pw.i[55] = 0; + pw.i[56] = 0; + pw.i[57] = 0; + pw.i[58] = 0; + pw.i[59] = 0; + pw.i[60] = 0; + pw.i[61] = 0; + pw.i[62] = 0; + pw.i[63] = 0; - #pragma unroll - for (int idx = 0; idx < 64; idx++) - { - pws_buf[gid].i[idx] = pw_buf[idx]; - } + pw.pw_len = pw_len; - pws_buf[gid].pw_len = pw_len; + generate_pw (pw.i, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid); + + pws_buf[gid] = pw; } diff --git a/OpenCL/shared.cl b/OpenCL/shared.cl index 30df353ef..47c37c3c6 100644 --- a/OpenCL/shared.cl +++ b/OpenCL/shared.cl @@ -10,26 +10,87 @@ #include "inc_common.cl" #endif -DECLSPEC void gpu_decompress_entry (GLOBAL_AS pw_idx_t *pws_idx, GLOBAL_AS u32 *pws_comp, pw_t *pw, const u64 gid) +DECLSPEC void gpu_decompress_entry (GLOBAL_AS pw_idx_t *pws_idx, GLOBAL_AS u32 *pws_comp, pw_t *buf, const u64 gid) { const u32 off = pws_idx[gid].off; const u32 cnt = pws_idx[gid].cnt; const u32 len = pws_idx[gid].len; - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - pw->i[i] = 0; - } + pw_t pw; + + pw.i[ 0] = 0; + pw.i[ 1] = 0; + pw.i[ 2] = 0; + pw.i[ 3] = 0; + pw.i[ 4] = 0; + pw.i[ 5] = 0; + pw.i[ 6] = 0; + pw.i[ 7] = 0; + pw.i[ 8] = 0; + pw.i[ 9] = 0; + pw.i[10] = 0; + pw.i[11] = 0; + pw.i[12] = 0; + pw.i[13] = 0; + pw.i[14] = 0; + pw.i[15] = 0; + pw.i[16] = 0; + pw.i[17] = 0; + pw.i[18] = 0; + pw.i[19] = 0; + pw.i[20] = 0; + pw.i[21] = 0; + pw.i[22] = 0; + pw.i[23] = 0; + pw.i[24] = 0; + pw.i[25] = 0; + pw.i[26] = 0; + pw.i[27] = 0; + pw.i[28] = 0; + pw.i[29] = 0; + pw.i[30] = 0; + pw.i[31] = 0; + pw.i[32] = 0; + pw.i[33] = 0; + pw.i[34] = 0; + pw.i[35] = 0; + pw.i[36] = 0; + pw.i[37] = 0; + pw.i[38] = 0; + pw.i[39] = 0; + pw.i[40] = 0; + pw.i[41] = 0; + pw.i[42] = 0; + pw.i[43] = 0; + pw.i[44] = 0; + pw.i[45] = 0; + pw.i[46] = 0; + pw.i[47] = 0; + pw.i[48] = 0; + pw.i[49] = 0; + pw.i[50] = 0; + pw.i[51] = 0; + pw.i[52] = 0; + pw.i[53] = 0; + pw.i[54] = 0; + pw.i[55] = 0; + pw.i[56] = 0; + pw.i[57] = 0; + pw.i[58] = 0; + pw.i[59] = 0; + pw.i[60] = 0; + pw.i[61] = 0; + pw.i[62] = 0; + pw.i[63] = 0; + + pw.pw_len = len; for (u32 i = 0, j = off; i < cnt; i++, j++) { - pw->i[i] = pws_comp[j]; + pw.i[i] = pws_comp[j]; } - pw->pw_len = len; + *buf = pw; } KERNEL_FQ void gpu_decompress (GLOBAL_AS pw_idx_t *pws_idx, GLOBAL_AS u32 *pws_comp, GLOBAL_AS pw_t *pws_buf, const u64 gid_max)