mirror of
https://github.com/hashcat/hashcat
synced 2024-12-01 20:18:12 +01:00
11105 lines
357 KiB
Common Lisp
11105 lines
357 KiB
Common Lisp
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*/
|
|
|
|
#include "inc_hash_constants.h"
|
|
#include "inc_vendor.cl"
|
|
#include "inc_types.cl"
|
|
|
|
inline void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
|
|
{
|
|
#if defined IS_AMD || defined IS_GENERIC
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset;
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = amd_bytealign_S (w[63], w[62], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[62], w[61], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[61], w[60], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[60], w[59], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[59], w[58], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[58], w[57], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[57], w[56], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[56], w[55], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[ 7] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[ 6] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[ 5] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[ 4] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[ 3] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[ 2] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 1] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 0] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 0] = w[ 1];
|
|
w[ 1] = w[ 2];
|
|
w[ 2] = w[ 3];
|
|
w[ 3] = w[ 4];
|
|
w[ 4] = w[ 5];
|
|
w[ 5] = w[ 6];
|
|
w[ 6] = w[ 7];
|
|
w[ 7] = w[ 8];
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = amd_bytealign_S (w[62], w[61], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[61], w[60], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[60], w[59], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[59], w[58], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[58], w[57], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[57], w[56], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[56], w[55], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[ 7] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[ 6] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[ 5] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[ 4] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[ 3] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 2] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 1] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 1] = w[ 2];
|
|
w[ 2] = w[ 3];
|
|
w[ 3] = w[ 4];
|
|
w[ 4] = w[ 5];
|
|
w[ 5] = w[ 6];
|
|
w[ 6] = w[ 7];
|
|
w[ 7] = w[ 8];
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = amd_bytealign_S (w[61], w[60], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[60], w[59], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[59], w[58], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[58], w[57], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[57], w[56], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[56], w[55], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[ 7] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[ 6] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[ 5] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[ 4] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 3] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 2] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 2] = w[ 3];
|
|
w[ 3] = w[ 4];
|
|
w[ 4] = w[ 5];
|
|
w[ 5] = w[ 6];
|
|
w[ 6] = w[ 7];
|
|
w[ 7] = w[ 8];
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = amd_bytealign_S (w[60], w[59], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[59], w[58], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[58], w[57], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[57], w[56], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[56], w[55], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[ 7] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[ 6] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[ 5] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 4] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 3] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 3] = w[ 4];
|
|
w[ 4] = w[ 5];
|
|
w[ 5] = w[ 6];
|
|
w[ 6] = w[ 7];
|
|
w[ 7] = w[ 8];
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = amd_bytealign_S (w[59], w[58], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[58], w[57], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[57], w[56], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[56], w[55], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[ 7] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[ 6] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 5] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 4] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 4] = w[ 5];
|
|
w[ 5] = w[ 6];
|
|
w[ 6] = w[ 7];
|
|
w[ 7] = w[ 8];
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = amd_bytealign_S (w[58], w[57], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[57], w[56], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[56], w[55], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[ 7] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 6] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 5] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 5] = w[ 6];
|
|
w[ 6] = w[ 7];
|
|
w[ 7] = w[ 8];
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = amd_bytealign_S (w[57], w[56], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[56], w[55], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 7] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 6] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 6] = w[ 7];
|
|
w[ 7] = w[ 8];
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = amd_bytealign_S (w[56], w[55], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 7] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 7] = w[ 8];
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = amd_bytealign_S (w[55], w[54], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 8] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 8] = w[ 9];
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = amd_bytealign_S (w[54], w[53], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[ 9] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[ 9] = w[10];
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = amd_bytealign_S (w[53], w[52], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[10] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[10] = w[11];
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = amd_bytealign_S (w[52], w[51], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[11] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[11] = w[12];
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = amd_bytealign_S (w[51], w[50], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[12] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[12] = w[13];
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = amd_bytealign_S (w[50], w[49], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[13] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[13] = w[14];
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = amd_bytealign_S (w[49], w[48], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[14] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[14] = w[15];
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = amd_bytealign_S (w[48], w[47], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[15] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[15] = w[16];
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = amd_bytealign_S (w[47], w[46], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[16] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[16] = w[17];
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = amd_bytealign_S (w[46], w[45], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[17] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[17] = w[18];
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = amd_bytealign_S (w[45], w[44], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[18] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[18] = w[19];
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = amd_bytealign_S (w[44], w[43], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[19] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[19] = w[20];
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = amd_bytealign_S (w[43], w[42], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[20] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[20] = w[21];
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = amd_bytealign_S (w[42], w[41], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[21] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[21] = w[22];
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = amd_bytealign_S (w[41], w[40], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[22] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[22] = w[23];
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = amd_bytealign_S (w[40], w[39], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[23] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[23] = w[24];
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = amd_bytealign_S (w[39], w[38], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[24] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[24] = w[25];
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = amd_bytealign_S (w[38], w[37], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[25] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[25] = w[26];
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = amd_bytealign_S (w[37], w[36], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[26] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[26] = w[27];
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = amd_bytealign_S (w[36], w[35], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[27] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[27] = w[28];
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = amd_bytealign_S (w[35], w[34], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[28] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[28] = w[29];
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = amd_bytealign_S (w[34], w[33], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[29] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[29] = w[30];
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = amd_bytealign_S (w[33], w[32], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[30] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[30] = w[31];
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = amd_bytealign_S (w[32], w[31], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[31] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[31] = w[32];
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = amd_bytealign_S (w[31], w[30], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[32] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[32] = w[33];
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = amd_bytealign_S (w[30], w[29], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[33] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[33] = w[34];
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = amd_bytealign_S (w[29], w[28], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[34] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[34] = w[35];
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = amd_bytealign_S (w[28], w[27], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[35] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[35] = w[36];
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = amd_bytealign_S (w[27], w[26], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[36] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[36] = w[37];
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = amd_bytealign_S (w[26], w[25], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[37] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[37] = w[38];
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = amd_bytealign_S (w[25], w[24], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[38] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[38] = w[39];
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = amd_bytealign_S (w[24], w[23], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[39] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[39] = w[40];
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = amd_bytealign_S (w[23], w[22], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[40] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[40] = w[41];
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = amd_bytealign_S (w[22], w[21], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[41] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[41] = w[42];
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = amd_bytealign_S (w[21], w[20], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[42] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[42] = w[43];
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = amd_bytealign_S (w[20], w[19], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[43] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[43] = w[44];
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = amd_bytealign_S (w[19], w[18], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[44] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[44] = w[45];
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = amd_bytealign_S (w[18], w[17], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[45] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[45] = w[46];
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = amd_bytealign_S (w[17], w[16], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[46] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[46] = w[47];
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = amd_bytealign_S (w[16], w[15], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[47] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[47] = w[48];
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = amd_bytealign_S (w[15], w[14], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[48] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[48] = w[49];
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = amd_bytealign_S (w[14], w[13], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[49] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[49] = w[50];
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = amd_bytealign_S (w[13], w[12], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[50] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[50] = w[51];
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = amd_bytealign_S (w[12], w[11], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[51] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[51] = w[52];
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = amd_bytealign_S (w[11], w[10], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[52] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[52] = w[53];
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[53] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[53] = w[54];
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[54] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[54] = w[55];
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[55] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[55] = w[56];
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[56] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[56] = w[57];
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[57] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[57] = w[58];
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[58] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[58] = w[59];
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[59] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[59] = w[60];
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[60] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[60] = w[61];
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[61] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[61] = w[62];
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
|
|
w[62] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[62] = w[63];
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w[63] = 0;
|
|
}
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#ifdef IS_NV
|
|
const int offset_minus_4 = 4 - (offset % 4);
|
|
|
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = __byte_perm_S (w[62], w[63], selector);
|
|
w[62] = __byte_perm_S (w[61], w[62], selector);
|
|
w[61] = __byte_perm_S (w[60], w[61], selector);
|
|
w[60] = __byte_perm_S (w[59], w[60], selector);
|
|
w[59] = __byte_perm_S (w[58], w[59], selector);
|
|
w[58] = __byte_perm_S (w[57], w[58], selector);
|
|
w[57] = __byte_perm_S (w[56], w[57], selector);
|
|
w[56] = __byte_perm_S (w[55], w[56], selector);
|
|
w[55] = __byte_perm_S (w[54], w[55], selector);
|
|
w[54] = __byte_perm_S (w[53], w[54], selector);
|
|
w[53] = __byte_perm_S (w[52], w[53], selector);
|
|
w[52] = __byte_perm_S (w[51], w[52], selector);
|
|
w[51] = __byte_perm_S (w[50], w[51], selector);
|
|
w[50] = __byte_perm_S (w[49], w[50], selector);
|
|
w[49] = __byte_perm_S (w[48], w[49], selector);
|
|
w[48] = __byte_perm_S (w[47], w[48], selector);
|
|
w[47] = __byte_perm_S (w[46], w[47], selector);
|
|
w[46] = __byte_perm_S (w[45], w[46], selector);
|
|
w[45] = __byte_perm_S (w[44], w[45], selector);
|
|
w[44] = __byte_perm_S (w[43], w[44], selector);
|
|
w[43] = __byte_perm_S (w[42], w[43], selector);
|
|
w[42] = __byte_perm_S (w[41], w[42], selector);
|
|
w[41] = __byte_perm_S (w[40], w[41], selector);
|
|
w[40] = __byte_perm_S (w[39], w[40], selector);
|
|
w[39] = __byte_perm_S (w[38], w[39], selector);
|
|
w[38] = __byte_perm_S (w[37], w[38], selector);
|
|
w[37] = __byte_perm_S (w[36], w[37], selector);
|
|
w[36] = __byte_perm_S (w[35], w[36], selector);
|
|
w[35] = __byte_perm_S (w[34], w[35], selector);
|
|
w[34] = __byte_perm_S (w[33], w[34], selector);
|
|
w[33] = __byte_perm_S (w[32], w[33], selector);
|
|
w[32] = __byte_perm_S (w[31], w[32], selector);
|
|
w[31] = __byte_perm_S (w[30], w[31], selector);
|
|
w[30] = __byte_perm_S (w[29], w[30], selector);
|
|
w[29] = __byte_perm_S (w[28], w[29], selector);
|
|
w[28] = __byte_perm_S (w[27], w[28], selector);
|
|
w[27] = __byte_perm_S (w[26], w[27], selector);
|
|
w[26] = __byte_perm_S (w[25], w[26], selector);
|
|
w[25] = __byte_perm_S (w[24], w[25], selector);
|
|
w[24] = __byte_perm_S (w[23], w[24], selector);
|
|
w[23] = __byte_perm_S (w[22], w[23], selector);
|
|
w[22] = __byte_perm_S (w[21], w[22], selector);
|
|
w[21] = __byte_perm_S (w[20], w[21], selector);
|
|
w[20] = __byte_perm_S (w[19], w[20], selector);
|
|
w[19] = __byte_perm_S (w[18], w[19], selector);
|
|
w[18] = __byte_perm_S (w[17], w[18], selector);
|
|
w[17] = __byte_perm_S (w[16], w[17], selector);
|
|
w[16] = __byte_perm_S (w[15], w[16], selector);
|
|
w[15] = __byte_perm_S (w[14], w[15], selector);
|
|
w[14] = __byte_perm_S (w[13], w[14], selector);
|
|
w[13] = __byte_perm_S (w[12], w[13], selector);
|
|
w[12] = __byte_perm_S (w[11], w[12], selector);
|
|
w[11] = __byte_perm_S (w[10], w[11], selector);
|
|
w[10] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[ 9] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[ 8] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[ 7] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[ 6] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 5] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 4] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 3] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 2] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 1] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 0] = __byte_perm_S ( 0, w[ 0], selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = __byte_perm_S (w[61], w[62], selector);
|
|
w[62] = __byte_perm_S (w[60], w[61], selector);
|
|
w[61] = __byte_perm_S (w[59], w[60], selector);
|
|
w[60] = __byte_perm_S (w[58], w[59], selector);
|
|
w[59] = __byte_perm_S (w[57], w[58], selector);
|
|
w[58] = __byte_perm_S (w[56], w[57], selector);
|
|
w[57] = __byte_perm_S (w[55], w[56], selector);
|
|
w[56] = __byte_perm_S (w[54], w[55], selector);
|
|
w[55] = __byte_perm_S (w[53], w[54], selector);
|
|
w[54] = __byte_perm_S (w[52], w[53], selector);
|
|
w[53] = __byte_perm_S (w[51], w[52], selector);
|
|
w[52] = __byte_perm_S (w[50], w[51], selector);
|
|
w[51] = __byte_perm_S (w[49], w[50], selector);
|
|
w[50] = __byte_perm_S (w[48], w[49], selector);
|
|
w[49] = __byte_perm_S (w[47], w[48], selector);
|
|
w[48] = __byte_perm_S (w[46], w[47], selector);
|
|
w[47] = __byte_perm_S (w[45], w[46], selector);
|
|
w[46] = __byte_perm_S (w[44], w[45], selector);
|
|
w[45] = __byte_perm_S (w[43], w[44], selector);
|
|
w[44] = __byte_perm_S (w[42], w[43], selector);
|
|
w[43] = __byte_perm_S (w[41], w[42], selector);
|
|
w[42] = __byte_perm_S (w[40], w[41], selector);
|
|
w[41] = __byte_perm_S (w[39], w[40], selector);
|
|
w[40] = __byte_perm_S (w[38], w[39], selector);
|
|
w[39] = __byte_perm_S (w[37], w[38], selector);
|
|
w[38] = __byte_perm_S (w[36], w[37], selector);
|
|
w[37] = __byte_perm_S (w[35], w[36], selector);
|
|
w[36] = __byte_perm_S (w[34], w[35], selector);
|
|
w[35] = __byte_perm_S (w[33], w[34], selector);
|
|
w[34] = __byte_perm_S (w[32], w[33], selector);
|
|
w[33] = __byte_perm_S (w[31], w[32], selector);
|
|
w[32] = __byte_perm_S (w[30], w[31], selector);
|
|
w[31] = __byte_perm_S (w[29], w[30], selector);
|
|
w[30] = __byte_perm_S (w[28], w[29], selector);
|
|
w[29] = __byte_perm_S (w[27], w[28], selector);
|
|
w[28] = __byte_perm_S (w[26], w[27], selector);
|
|
w[27] = __byte_perm_S (w[25], w[26], selector);
|
|
w[26] = __byte_perm_S (w[24], w[25], selector);
|
|
w[25] = __byte_perm_S (w[23], w[24], selector);
|
|
w[24] = __byte_perm_S (w[22], w[23], selector);
|
|
w[23] = __byte_perm_S (w[21], w[22], selector);
|
|
w[22] = __byte_perm_S (w[20], w[21], selector);
|
|
w[21] = __byte_perm_S (w[19], w[20], selector);
|
|
w[20] = __byte_perm_S (w[18], w[19], selector);
|
|
w[19] = __byte_perm_S (w[17], w[18], selector);
|
|
w[18] = __byte_perm_S (w[16], w[17], selector);
|
|
w[17] = __byte_perm_S (w[15], w[16], selector);
|
|
w[16] = __byte_perm_S (w[14], w[15], selector);
|
|
w[15] = __byte_perm_S (w[13], w[14], selector);
|
|
w[14] = __byte_perm_S (w[12], w[13], selector);
|
|
w[13] = __byte_perm_S (w[11], w[12], selector);
|
|
w[12] = __byte_perm_S (w[10], w[11], selector);
|
|
w[11] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[10] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[ 9] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[ 8] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[ 7] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 6] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 5] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 4] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 3] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 2] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 1] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = __byte_perm_S (w[60], w[61], selector);
|
|
w[62] = __byte_perm_S (w[59], w[60], selector);
|
|
w[61] = __byte_perm_S (w[58], w[59], selector);
|
|
w[60] = __byte_perm_S (w[57], w[58], selector);
|
|
w[59] = __byte_perm_S (w[56], w[57], selector);
|
|
w[58] = __byte_perm_S (w[55], w[56], selector);
|
|
w[57] = __byte_perm_S (w[54], w[55], selector);
|
|
w[56] = __byte_perm_S (w[53], w[54], selector);
|
|
w[55] = __byte_perm_S (w[52], w[53], selector);
|
|
w[54] = __byte_perm_S (w[51], w[52], selector);
|
|
w[53] = __byte_perm_S (w[50], w[51], selector);
|
|
w[52] = __byte_perm_S (w[49], w[50], selector);
|
|
w[51] = __byte_perm_S (w[48], w[49], selector);
|
|
w[50] = __byte_perm_S (w[47], w[48], selector);
|
|
w[49] = __byte_perm_S (w[46], w[47], selector);
|
|
w[48] = __byte_perm_S (w[45], w[46], selector);
|
|
w[47] = __byte_perm_S (w[44], w[45], selector);
|
|
w[46] = __byte_perm_S (w[43], w[44], selector);
|
|
w[45] = __byte_perm_S (w[42], w[43], selector);
|
|
w[44] = __byte_perm_S (w[41], w[42], selector);
|
|
w[43] = __byte_perm_S (w[40], w[41], selector);
|
|
w[42] = __byte_perm_S (w[39], w[40], selector);
|
|
w[41] = __byte_perm_S (w[38], w[39], selector);
|
|
w[40] = __byte_perm_S (w[37], w[38], selector);
|
|
w[39] = __byte_perm_S (w[36], w[37], selector);
|
|
w[38] = __byte_perm_S (w[35], w[36], selector);
|
|
w[37] = __byte_perm_S (w[34], w[35], selector);
|
|
w[36] = __byte_perm_S (w[33], w[34], selector);
|
|
w[35] = __byte_perm_S (w[32], w[33], selector);
|
|
w[34] = __byte_perm_S (w[31], w[32], selector);
|
|
w[33] = __byte_perm_S (w[30], w[31], selector);
|
|
w[32] = __byte_perm_S (w[29], w[30], selector);
|
|
w[31] = __byte_perm_S (w[28], w[29], selector);
|
|
w[30] = __byte_perm_S (w[27], w[28], selector);
|
|
w[29] = __byte_perm_S (w[26], w[27], selector);
|
|
w[28] = __byte_perm_S (w[25], w[26], selector);
|
|
w[27] = __byte_perm_S (w[24], w[25], selector);
|
|
w[26] = __byte_perm_S (w[23], w[24], selector);
|
|
w[25] = __byte_perm_S (w[22], w[23], selector);
|
|
w[24] = __byte_perm_S (w[21], w[22], selector);
|
|
w[23] = __byte_perm_S (w[20], w[21], selector);
|
|
w[22] = __byte_perm_S (w[19], w[20], selector);
|
|
w[21] = __byte_perm_S (w[18], w[19], selector);
|
|
w[20] = __byte_perm_S (w[17], w[18], selector);
|
|
w[19] = __byte_perm_S (w[16], w[17], selector);
|
|
w[18] = __byte_perm_S (w[15], w[16], selector);
|
|
w[17] = __byte_perm_S (w[14], w[15], selector);
|
|
w[16] = __byte_perm_S (w[13], w[14], selector);
|
|
w[15] = __byte_perm_S (w[12], w[13], selector);
|
|
w[14] = __byte_perm_S (w[11], w[12], selector);
|
|
w[13] = __byte_perm_S (w[10], w[11], selector);
|
|
w[12] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[11] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[10] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[ 9] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[ 8] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 7] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 6] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 5] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 4] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 3] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 2] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = __byte_perm_S (w[59], w[60], selector);
|
|
w[62] = __byte_perm_S (w[58], w[59], selector);
|
|
w[61] = __byte_perm_S (w[57], w[58], selector);
|
|
w[60] = __byte_perm_S (w[56], w[57], selector);
|
|
w[59] = __byte_perm_S (w[55], w[56], selector);
|
|
w[58] = __byte_perm_S (w[54], w[55], selector);
|
|
w[57] = __byte_perm_S (w[53], w[54], selector);
|
|
w[56] = __byte_perm_S (w[52], w[53], selector);
|
|
w[55] = __byte_perm_S (w[51], w[52], selector);
|
|
w[54] = __byte_perm_S (w[50], w[51], selector);
|
|
w[53] = __byte_perm_S (w[49], w[50], selector);
|
|
w[52] = __byte_perm_S (w[48], w[49], selector);
|
|
w[51] = __byte_perm_S (w[47], w[48], selector);
|
|
w[50] = __byte_perm_S (w[46], w[47], selector);
|
|
w[49] = __byte_perm_S (w[45], w[46], selector);
|
|
w[48] = __byte_perm_S (w[44], w[45], selector);
|
|
w[47] = __byte_perm_S (w[43], w[44], selector);
|
|
w[46] = __byte_perm_S (w[42], w[43], selector);
|
|
w[45] = __byte_perm_S (w[41], w[42], selector);
|
|
w[44] = __byte_perm_S (w[40], w[41], selector);
|
|
w[43] = __byte_perm_S (w[39], w[40], selector);
|
|
w[42] = __byte_perm_S (w[38], w[39], selector);
|
|
w[41] = __byte_perm_S (w[37], w[38], selector);
|
|
w[40] = __byte_perm_S (w[36], w[37], selector);
|
|
w[39] = __byte_perm_S (w[35], w[36], selector);
|
|
w[38] = __byte_perm_S (w[34], w[35], selector);
|
|
w[37] = __byte_perm_S (w[33], w[34], selector);
|
|
w[36] = __byte_perm_S (w[32], w[33], selector);
|
|
w[35] = __byte_perm_S (w[31], w[32], selector);
|
|
w[34] = __byte_perm_S (w[30], w[31], selector);
|
|
w[33] = __byte_perm_S (w[29], w[30], selector);
|
|
w[32] = __byte_perm_S (w[28], w[29], selector);
|
|
w[31] = __byte_perm_S (w[27], w[28], selector);
|
|
w[30] = __byte_perm_S (w[26], w[27], selector);
|
|
w[29] = __byte_perm_S (w[25], w[26], selector);
|
|
w[28] = __byte_perm_S (w[24], w[25], selector);
|
|
w[27] = __byte_perm_S (w[23], w[24], selector);
|
|
w[26] = __byte_perm_S (w[22], w[23], selector);
|
|
w[25] = __byte_perm_S (w[21], w[22], selector);
|
|
w[24] = __byte_perm_S (w[20], w[21], selector);
|
|
w[23] = __byte_perm_S (w[19], w[20], selector);
|
|
w[22] = __byte_perm_S (w[18], w[19], selector);
|
|
w[21] = __byte_perm_S (w[17], w[18], selector);
|
|
w[20] = __byte_perm_S (w[16], w[17], selector);
|
|
w[19] = __byte_perm_S (w[15], w[16], selector);
|
|
w[18] = __byte_perm_S (w[14], w[15], selector);
|
|
w[17] = __byte_perm_S (w[13], w[14], selector);
|
|
w[16] = __byte_perm_S (w[12], w[13], selector);
|
|
w[15] = __byte_perm_S (w[11], w[12], selector);
|
|
w[14] = __byte_perm_S (w[10], w[11], selector);
|
|
w[13] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[12] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[11] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[10] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[ 9] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 8] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 7] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 6] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 5] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 4] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 3] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = __byte_perm_S (w[58], w[59], selector);
|
|
w[62] = __byte_perm_S (w[57], w[58], selector);
|
|
w[61] = __byte_perm_S (w[56], w[57], selector);
|
|
w[60] = __byte_perm_S (w[55], w[56], selector);
|
|
w[59] = __byte_perm_S (w[54], w[55], selector);
|
|
w[58] = __byte_perm_S (w[53], w[54], selector);
|
|
w[57] = __byte_perm_S (w[52], w[53], selector);
|
|
w[56] = __byte_perm_S (w[51], w[52], selector);
|
|
w[55] = __byte_perm_S (w[50], w[51], selector);
|
|
w[54] = __byte_perm_S (w[49], w[50], selector);
|
|
w[53] = __byte_perm_S (w[48], w[49], selector);
|
|
w[52] = __byte_perm_S (w[47], w[48], selector);
|
|
w[51] = __byte_perm_S (w[46], w[47], selector);
|
|
w[50] = __byte_perm_S (w[45], w[46], selector);
|
|
w[49] = __byte_perm_S (w[44], w[45], selector);
|
|
w[48] = __byte_perm_S (w[43], w[44], selector);
|
|
w[47] = __byte_perm_S (w[42], w[43], selector);
|
|
w[46] = __byte_perm_S (w[41], w[42], selector);
|
|
w[45] = __byte_perm_S (w[40], w[41], selector);
|
|
w[44] = __byte_perm_S (w[39], w[40], selector);
|
|
w[43] = __byte_perm_S (w[38], w[39], selector);
|
|
w[42] = __byte_perm_S (w[37], w[38], selector);
|
|
w[41] = __byte_perm_S (w[36], w[37], selector);
|
|
w[40] = __byte_perm_S (w[35], w[36], selector);
|
|
w[39] = __byte_perm_S (w[34], w[35], selector);
|
|
w[38] = __byte_perm_S (w[33], w[34], selector);
|
|
w[37] = __byte_perm_S (w[32], w[33], selector);
|
|
w[36] = __byte_perm_S (w[31], w[32], selector);
|
|
w[35] = __byte_perm_S (w[30], w[31], selector);
|
|
w[34] = __byte_perm_S (w[29], w[30], selector);
|
|
w[33] = __byte_perm_S (w[28], w[29], selector);
|
|
w[32] = __byte_perm_S (w[27], w[28], selector);
|
|
w[31] = __byte_perm_S (w[26], w[27], selector);
|
|
w[30] = __byte_perm_S (w[25], w[26], selector);
|
|
w[29] = __byte_perm_S (w[24], w[25], selector);
|
|
w[28] = __byte_perm_S (w[23], w[24], selector);
|
|
w[27] = __byte_perm_S (w[22], w[23], selector);
|
|
w[26] = __byte_perm_S (w[21], w[22], selector);
|
|
w[25] = __byte_perm_S (w[20], w[21], selector);
|
|
w[24] = __byte_perm_S (w[19], w[20], selector);
|
|
w[23] = __byte_perm_S (w[18], w[19], selector);
|
|
w[22] = __byte_perm_S (w[17], w[18], selector);
|
|
w[21] = __byte_perm_S (w[16], w[17], selector);
|
|
w[20] = __byte_perm_S (w[15], w[16], selector);
|
|
w[19] = __byte_perm_S (w[14], w[15], selector);
|
|
w[18] = __byte_perm_S (w[13], w[14], selector);
|
|
w[17] = __byte_perm_S (w[12], w[13], selector);
|
|
w[16] = __byte_perm_S (w[11], w[12], selector);
|
|
w[15] = __byte_perm_S (w[10], w[11], selector);
|
|
w[14] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[13] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[12] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[11] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[10] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 9] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 8] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 7] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 6] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 5] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 4] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = __byte_perm_S (w[57], w[58], selector);
|
|
w[62] = __byte_perm_S (w[56], w[57], selector);
|
|
w[61] = __byte_perm_S (w[55], w[56], selector);
|
|
w[60] = __byte_perm_S (w[54], w[55], selector);
|
|
w[59] = __byte_perm_S (w[53], w[54], selector);
|
|
w[58] = __byte_perm_S (w[52], w[53], selector);
|
|
w[57] = __byte_perm_S (w[51], w[52], selector);
|
|
w[56] = __byte_perm_S (w[50], w[51], selector);
|
|
w[55] = __byte_perm_S (w[49], w[50], selector);
|
|
w[54] = __byte_perm_S (w[48], w[49], selector);
|
|
w[53] = __byte_perm_S (w[47], w[48], selector);
|
|
w[52] = __byte_perm_S (w[46], w[47], selector);
|
|
w[51] = __byte_perm_S (w[45], w[46], selector);
|
|
w[50] = __byte_perm_S (w[44], w[45], selector);
|
|
w[49] = __byte_perm_S (w[43], w[44], selector);
|
|
w[48] = __byte_perm_S (w[42], w[43], selector);
|
|
w[47] = __byte_perm_S (w[41], w[42], selector);
|
|
w[46] = __byte_perm_S (w[40], w[41], selector);
|
|
w[45] = __byte_perm_S (w[39], w[40], selector);
|
|
w[44] = __byte_perm_S (w[38], w[39], selector);
|
|
w[43] = __byte_perm_S (w[37], w[38], selector);
|
|
w[42] = __byte_perm_S (w[36], w[37], selector);
|
|
w[41] = __byte_perm_S (w[35], w[36], selector);
|
|
w[40] = __byte_perm_S (w[34], w[35], selector);
|
|
w[39] = __byte_perm_S (w[33], w[34], selector);
|
|
w[38] = __byte_perm_S (w[32], w[33], selector);
|
|
w[37] = __byte_perm_S (w[31], w[32], selector);
|
|
w[36] = __byte_perm_S (w[30], w[31], selector);
|
|
w[35] = __byte_perm_S (w[29], w[30], selector);
|
|
w[34] = __byte_perm_S (w[28], w[29], selector);
|
|
w[33] = __byte_perm_S (w[27], w[28], selector);
|
|
w[32] = __byte_perm_S (w[26], w[27], selector);
|
|
w[31] = __byte_perm_S (w[25], w[26], selector);
|
|
w[30] = __byte_perm_S (w[24], w[25], selector);
|
|
w[29] = __byte_perm_S (w[23], w[24], selector);
|
|
w[28] = __byte_perm_S (w[22], w[23], selector);
|
|
w[27] = __byte_perm_S (w[21], w[22], selector);
|
|
w[26] = __byte_perm_S (w[20], w[21], selector);
|
|
w[25] = __byte_perm_S (w[19], w[20], selector);
|
|
w[24] = __byte_perm_S (w[18], w[19], selector);
|
|
w[23] = __byte_perm_S (w[17], w[18], selector);
|
|
w[22] = __byte_perm_S (w[16], w[17], selector);
|
|
w[21] = __byte_perm_S (w[15], w[16], selector);
|
|
w[20] = __byte_perm_S (w[14], w[15], selector);
|
|
w[19] = __byte_perm_S (w[13], w[14], selector);
|
|
w[18] = __byte_perm_S (w[12], w[13], selector);
|
|
w[17] = __byte_perm_S (w[11], w[12], selector);
|
|
w[16] = __byte_perm_S (w[10], w[11], selector);
|
|
w[15] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[14] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[13] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[12] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[11] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[10] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 9] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 8] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 7] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 6] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 5] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = __byte_perm_S (w[56], w[57], selector);
|
|
w[62] = __byte_perm_S (w[55], w[56], selector);
|
|
w[61] = __byte_perm_S (w[54], w[55], selector);
|
|
w[60] = __byte_perm_S (w[53], w[54], selector);
|
|
w[59] = __byte_perm_S (w[52], w[53], selector);
|
|
w[58] = __byte_perm_S (w[51], w[52], selector);
|
|
w[57] = __byte_perm_S (w[50], w[51], selector);
|
|
w[56] = __byte_perm_S (w[49], w[50], selector);
|
|
w[55] = __byte_perm_S (w[48], w[49], selector);
|
|
w[54] = __byte_perm_S (w[47], w[48], selector);
|
|
w[53] = __byte_perm_S (w[46], w[47], selector);
|
|
w[52] = __byte_perm_S (w[45], w[46], selector);
|
|
w[51] = __byte_perm_S (w[44], w[45], selector);
|
|
w[50] = __byte_perm_S (w[43], w[44], selector);
|
|
w[49] = __byte_perm_S (w[42], w[43], selector);
|
|
w[48] = __byte_perm_S (w[41], w[42], selector);
|
|
w[47] = __byte_perm_S (w[40], w[41], selector);
|
|
w[46] = __byte_perm_S (w[39], w[40], selector);
|
|
w[45] = __byte_perm_S (w[38], w[39], selector);
|
|
w[44] = __byte_perm_S (w[37], w[38], selector);
|
|
w[43] = __byte_perm_S (w[36], w[37], selector);
|
|
w[42] = __byte_perm_S (w[35], w[36], selector);
|
|
w[41] = __byte_perm_S (w[34], w[35], selector);
|
|
w[40] = __byte_perm_S (w[33], w[34], selector);
|
|
w[39] = __byte_perm_S (w[32], w[33], selector);
|
|
w[38] = __byte_perm_S (w[31], w[32], selector);
|
|
w[37] = __byte_perm_S (w[30], w[31], selector);
|
|
w[36] = __byte_perm_S (w[29], w[30], selector);
|
|
w[35] = __byte_perm_S (w[28], w[29], selector);
|
|
w[34] = __byte_perm_S (w[27], w[28], selector);
|
|
w[33] = __byte_perm_S (w[26], w[27], selector);
|
|
w[32] = __byte_perm_S (w[25], w[26], selector);
|
|
w[31] = __byte_perm_S (w[24], w[25], selector);
|
|
w[30] = __byte_perm_S (w[23], w[24], selector);
|
|
w[29] = __byte_perm_S (w[22], w[23], selector);
|
|
w[28] = __byte_perm_S (w[21], w[22], selector);
|
|
w[27] = __byte_perm_S (w[20], w[21], selector);
|
|
w[26] = __byte_perm_S (w[19], w[20], selector);
|
|
w[25] = __byte_perm_S (w[18], w[19], selector);
|
|
w[24] = __byte_perm_S (w[17], w[18], selector);
|
|
w[23] = __byte_perm_S (w[16], w[17], selector);
|
|
w[22] = __byte_perm_S (w[15], w[16], selector);
|
|
w[21] = __byte_perm_S (w[14], w[15], selector);
|
|
w[20] = __byte_perm_S (w[13], w[14], selector);
|
|
w[19] = __byte_perm_S (w[12], w[13], selector);
|
|
w[18] = __byte_perm_S (w[11], w[12], selector);
|
|
w[17] = __byte_perm_S (w[10], w[11], selector);
|
|
w[16] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[15] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[14] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[13] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[12] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[11] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[10] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 9] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 8] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 7] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 6] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = __byte_perm_S (w[55], w[56], selector);
|
|
w[62] = __byte_perm_S (w[54], w[55], selector);
|
|
w[61] = __byte_perm_S (w[53], w[54], selector);
|
|
w[60] = __byte_perm_S (w[52], w[53], selector);
|
|
w[59] = __byte_perm_S (w[51], w[52], selector);
|
|
w[58] = __byte_perm_S (w[50], w[51], selector);
|
|
w[57] = __byte_perm_S (w[49], w[50], selector);
|
|
w[56] = __byte_perm_S (w[48], w[49], selector);
|
|
w[55] = __byte_perm_S (w[47], w[48], selector);
|
|
w[54] = __byte_perm_S (w[46], w[47], selector);
|
|
w[53] = __byte_perm_S (w[45], w[46], selector);
|
|
w[52] = __byte_perm_S (w[44], w[45], selector);
|
|
w[51] = __byte_perm_S (w[43], w[44], selector);
|
|
w[50] = __byte_perm_S (w[42], w[43], selector);
|
|
w[49] = __byte_perm_S (w[41], w[42], selector);
|
|
w[48] = __byte_perm_S (w[40], w[41], selector);
|
|
w[47] = __byte_perm_S (w[39], w[40], selector);
|
|
w[46] = __byte_perm_S (w[38], w[39], selector);
|
|
w[45] = __byte_perm_S (w[37], w[38], selector);
|
|
w[44] = __byte_perm_S (w[36], w[37], selector);
|
|
w[43] = __byte_perm_S (w[35], w[36], selector);
|
|
w[42] = __byte_perm_S (w[34], w[35], selector);
|
|
w[41] = __byte_perm_S (w[33], w[34], selector);
|
|
w[40] = __byte_perm_S (w[32], w[33], selector);
|
|
w[39] = __byte_perm_S (w[31], w[32], selector);
|
|
w[38] = __byte_perm_S (w[30], w[31], selector);
|
|
w[37] = __byte_perm_S (w[29], w[30], selector);
|
|
w[36] = __byte_perm_S (w[28], w[29], selector);
|
|
w[35] = __byte_perm_S (w[27], w[28], selector);
|
|
w[34] = __byte_perm_S (w[26], w[27], selector);
|
|
w[33] = __byte_perm_S (w[25], w[26], selector);
|
|
w[32] = __byte_perm_S (w[24], w[25], selector);
|
|
w[31] = __byte_perm_S (w[23], w[24], selector);
|
|
w[30] = __byte_perm_S (w[22], w[23], selector);
|
|
w[29] = __byte_perm_S (w[21], w[22], selector);
|
|
w[28] = __byte_perm_S (w[20], w[21], selector);
|
|
w[27] = __byte_perm_S (w[19], w[20], selector);
|
|
w[26] = __byte_perm_S (w[18], w[19], selector);
|
|
w[25] = __byte_perm_S (w[17], w[18], selector);
|
|
w[24] = __byte_perm_S (w[16], w[17], selector);
|
|
w[23] = __byte_perm_S (w[15], w[16], selector);
|
|
w[22] = __byte_perm_S (w[14], w[15], selector);
|
|
w[21] = __byte_perm_S (w[13], w[14], selector);
|
|
w[20] = __byte_perm_S (w[12], w[13], selector);
|
|
w[19] = __byte_perm_S (w[11], w[12], selector);
|
|
w[18] = __byte_perm_S (w[10], w[11], selector);
|
|
w[17] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[16] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[15] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[14] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[13] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[12] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[11] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[10] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 9] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 8] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 7] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = __byte_perm_S (w[54], w[55], selector);
|
|
w[62] = __byte_perm_S (w[53], w[54], selector);
|
|
w[61] = __byte_perm_S (w[52], w[53], selector);
|
|
w[60] = __byte_perm_S (w[51], w[52], selector);
|
|
w[59] = __byte_perm_S (w[50], w[51], selector);
|
|
w[58] = __byte_perm_S (w[49], w[50], selector);
|
|
w[57] = __byte_perm_S (w[48], w[49], selector);
|
|
w[56] = __byte_perm_S (w[47], w[48], selector);
|
|
w[55] = __byte_perm_S (w[46], w[47], selector);
|
|
w[54] = __byte_perm_S (w[45], w[46], selector);
|
|
w[53] = __byte_perm_S (w[44], w[45], selector);
|
|
w[52] = __byte_perm_S (w[43], w[44], selector);
|
|
w[51] = __byte_perm_S (w[42], w[43], selector);
|
|
w[50] = __byte_perm_S (w[41], w[42], selector);
|
|
w[49] = __byte_perm_S (w[40], w[41], selector);
|
|
w[48] = __byte_perm_S (w[39], w[40], selector);
|
|
w[47] = __byte_perm_S (w[38], w[39], selector);
|
|
w[46] = __byte_perm_S (w[37], w[38], selector);
|
|
w[45] = __byte_perm_S (w[36], w[37], selector);
|
|
w[44] = __byte_perm_S (w[35], w[36], selector);
|
|
w[43] = __byte_perm_S (w[34], w[35], selector);
|
|
w[42] = __byte_perm_S (w[33], w[34], selector);
|
|
w[41] = __byte_perm_S (w[32], w[33], selector);
|
|
w[40] = __byte_perm_S (w[31], w[32], selector);
|
|
w[39] = __byte_perm_S (w[30], w[31], selector);
|
|
w[38] = __byte_perm_S (w[29], w[30], selector);
|
|
w[37] = __byte_perm_S (w[28], w[29], selector);
|
|
w[36] = __byte_perm_S (w[27], w[28], selector);
|
|
w[35] = __byte_perm_S (w[26], w[27], selector);
|
|
w[34] = __byte_perm_S (w[25], w[26], selector);
|
|
w[33] = __byte_perm_S (w[24], w[25], selector);
|
|
w[32] = __byte_perm_S (w[23], w[24], selector);
|
|
w[31] = __byte_perm_S (w[22], w[23], selector);
|
|
w[30] = __byte_perm_S (w[21], w[22], selector);
|
|
w[29] = __byte_perm_S (w[20], w[21], selector);
|
|
w[28] = __byte_perm_S (w[19], w[20], selector);
|
|
w[27] = __byte_perm_S (w[18], w[19], selector);
|
|
w[26] = __byte_perm_S (w[17], w[18], selector);
|
|
w[25] = __byte_perm_S (w[16], w[17], selector);
|
|
w[24] = __byte_perm_S (w[15], w[16], selector);
|
|
w[23] = __byte_perm_S (w[14], w[15], selector);
|
|
w[22] = __byte_perm_S (w[13], w[14], selector);
|
|
w[21] = __byte_perm_S (w[12], w[13], selector);
|
|
w[20] = __byte_perm_S (w[11], w[12], selector);
|
|
w[19] = __byte_perm_S (w[10], w[11], selector);
|
|
w[18] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[17] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[16] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[15] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[14] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[13] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[12] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[11] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[10] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 9] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 8] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = __byte_perm_S (w[53], w[54], selector);
|
|
w[62] = __byte_perm_S (w[52], w[53], selector);
|
|
w[61] = __byte_perm_S (w[51], w[52], selector);
|
|
w[60] = __byte_perm_S (w[50], w[51], selector);
|
|
w[59] = __byte_perm_S (w[49], w[50], selector);
|
|
w[58] = __byte_perm_S (w[48], w[49], selector);
|
|
w[57] = __byte_perm_S (w[47], w[48], selector);
|
|
w[56] = __byte_perm_S (w[46], w[47], selector);
|
|
w[55] = __byte_perm_S (w[45], w[46], selector);
|
|
w[54] = __byte_perm_S (w[44], w[45], selector);
|
|
w[53] = __byte_perm_S (w[43], w[44], selector);
|
|
w[52] = __byte_perm_S (w[42], w[43], selector);
|
|
w[51] = __byte_perm_S (w[41], w[42], selector);
|
|
w[50] = __byte_perm_S (w[40], w[41], selector);
|
|
w[49] = __byte_perm_S (w[39], w[40], selector);
|
|
w[48] = __byte_perm_S (w[38], w[39], selector);
|
|
w[47] = __byte_perm_S (w[37], w[38], selector);
|
|
w[46] = __byte_perm_S (w[36], w[37], selector);
|
|
w[45] = __byte_perm_S (w[35], w[36], selector);
|
|
w[44] = __byte_perm_S (w[34], w[35], selector);
|
|
w[43] = __byte_perm_S (w[33], w[34], selector);
|
|
w[42] = __byte_perm_S (w[32], w[33], selector);
|
|
w[41] = __byte_perm_S (w[31], w[32], selector);
|
|
w[40] = __byte_perm_S (w[30], w[31], selector);
|
|
w[39] = __byte_perm_S (w[29], w[30], selector);
|
|
w[38] = __byte_perm_S (w[28], w[29], selector);
|
|
w[37] = __byte_perm_S (w[27], w[28], selector);
|
|
w[36] = __byte_perm_S (w[26], w[27], selector);
|
|
w[35] = __byte_perm_S (w[25], w[26], selector);
|
|
w[34] = __byte_perm_S (w[24], w[25], selector);
|
|
w[33] = __byte_perm_S (w[23], w[24], selector);
|
|
w[32] = __byte_perm_S (w[22], w[23], selector);
|
|
w[31] = __byte_perm_S (w[21], w[22], selector);
|
|
w[30] = __byte_perm_S (w[20], w[21], selector);
|
|
w[29] = __byte_perm_S (w[19], w[20], selector);
|
|
w[28] = __byte_perm_S (w[18], w[19], selector);
|
|
w[27] = __byte_perm_S (w[17], w[18], selector);
|
|
w[26] = __byte_perm_S (w[16], w[17], selector);
|
|
w[25] = __byte_perm_S (w[15], w[16], selector);
|
|
w[24] = __byte_perm_S (w[14], w[15], selector);
|
|
w[23] = __byte_perm_S (w[13], w[14], selector);
|
|
w[22] = __byte_perm_S (w[12], w[13], selector);
|
|
w[21] = __byte_perm_S (w[11], w[12], selector);
|
|
w[20] = __byte_perm_S (w[10], w[11], selector);
|
|
w[19] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[18] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[17] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[16] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[15] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[14] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[13] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[12] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[11] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[10] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 9] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = __byte_perm_S (w[52], w[53], selector);
|
|
w[62] = __byte_perm_S (w[51], w[52], selector);
|
|
w[61] = __byte_perm_S (w[50], w[51], selector);
|
|
w[60] = __byte_perm_S (w[49], w[50], selector);
|
|
w[59] = __byte_perm_S (w[48], w[49], selector);
|
|
w[58] = __byte_perm_S (w[47], w[48], selector);
|
|
w[57] = __byte_perm_S (w[46], w[47], selector);
|
|
w[56] = __byte_perm_S (w[45], w[46], selector);
|
|
w[55] = __byte_perm_S (w[44], w[45], selector);
|
|
w[54] = __byte_perm_S (w[43], w[44], selector);
|
|
w[53] = __byte_perm_S (w[42], w[43], selector);
|
|
w[52] = __byte_perm_S (w[41], w[42], selector);
|
|
w[51] = __byte_perm_S (w[40], w[41], selector);
|
|
w[50] = __byte_perm_S (w[39], w[40], selector);
|
|
w[49] = __byte_perm_S (w[38], w[39], selector);
|
|
w[48] = __byte_perm_S (w[37], w[38], selector);
|
|
w[47] = __byte_perm_S (w[36], w[37], selector);
|
|
w[46] = __byte_perm_S (w[35], w[36], selector);
|
|
w[45] = __byte_perm_S (w[34], w[35], selector);
|
|
w[44] = __byte_perm_S (w[33], w[34], selector);
|
|
w[43] = __byte_perm_S (w[32], w[33], selector);
|
|
w[42] = __byte_perm_S (w[31], w[32], selector);
|
|
w[41] = __byte_perm_S (w[30], w[31], selector);
|
|
w[40] = __byte_perm_S (w[29], w[30], selector);
|
|
w[39] = __byte_perm_S (w[28], w[29], selector);
|
|
w[38] = __byte_perm_S (w[27], w[28], selector);
|
|
w[37] = __byte_perm_S (w[26], w[27], selector);
|
|
w[36] = __byte_perm_S (w[25], w[26], selector);
|
|
w[35] = __byte_perm_S (w[24], w[25], selector);
|
|
w[34] = __byte_perm_S (w[23], w[24], selector);
|
|
w[33] = __byte_perm_S (w[22], w[23], selector);
|
|
w[32] = __byte_perm_S (w[21], w[22], selector);
|
|
w[31] = __byte_perm_S (w[20], w[21], selector);
|
|
w[30] = __byte_perm_S (w[19], w[20], selector);
|
|
w[29] = __byte_perm_S (w[18], w[19], selector);
|
|
w[28] = __byte_perm_S (w[17], w[18], selector);
|
|
w[27] = __byte_perm_S (w[16], w[17], selector);
|
|
w[26] = __byte_perm_S (w[15], w[16], selector);
|
|
w[25] = __byte_perm_S (w[14], w[15], selector);
|
|
w[24] = __byte_perm_S (w[13], w[14], selector);
|
|
w[23] = __byte_perm_S (w[12], w[13], selector);
|
|
w[22] = __byte_perm_S (w[11], w[12], selector);
|
|
w[21] = __byte_perm_S (w[10], w[11], selector);
|
|
w[20] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[19] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[18] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[17] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[16] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[15] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[14] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[13] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[12] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[11] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[10] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = __byte_perm_S (w[51], w[52], selector);
|
|
w[62] = __byte_perm_S (w[50], w[51], selector);
|
|
w[61] = __byte_perm_S (w[49], w[50], selector);
|
|
w[60] = __byte_perm_S (w[48], w[49], selector);
|
|
w[59] = __byte_perm_S (w[47], w[48], selector);
|
|
w[58] = __byte_perm_S (w[46], w[47], selector);
|
|
w[57] = __byte_perm_S (w[45], w[46], selector);
|
|
w[56] = __byte_perm_S (w[44], w[45], selector);
|
|
w[55] = __byte_perm_S (w[43], w[44], selector);
|
|
w[54] = __byte_perm_S (w[42], w[43], selector);
|
|
w[53] = __byte_perm_S (w[41], w[42], selector);
|
|
w[52] = __byte_perm_S (w[40], w[41], selector);
|
|
w[51] = __byte_perm_S (w[39], w[40], selector);
|
|
w[50] = __byte_perm_S (w[38], w[39], selector);
|
|
w[49] = __byte_perm_S (w[37], w[38], selector);
|
|
w[48] = __byte_perm_S (w[36], w[37], selector);
|
|
w[47] = __byte_perm_S (w[35], w[36], selector);
|
|
w[46] = __byte_perm_S (w[34], w[35], selector);
|
|
w[45] = __byte_perm_S (w[33], w[34], selector);
|
|
w[44] = __byte_perm_S (w[32], w[33], selector);
|
|
w[43] = __byte_perm_S (w[31], w[32], selector);
|
|
w[42] = __byte_perm_S (w[30], w[31], selector);
|
|
w[41] = __byte_perm_S (w[29], w[30], selector);
|
|
w[40] = __byte_perm_S (w[28], w[29], selector);
|
|
w[39] = __byte_perm_S (w[27], w[28], selector);
|
|
w[38] = __byte_perm_S (w[26], w[27], selector);
|
|
w[37] = __byte_perm_S (w[25], w[26], selector);
|
|
w[36] = __byte_perm_S (w[24], w[25], selector);
|
|
w[35] = __byte_perm_S (w[23], w[24], selector);
|
|
w[34] = __byte_perm_S (w[22], w[23], selector);
|
|
w[33] = __byte_perm_S (w[21], w[22], selector);
|
|
w[32] = __byte_perm_S (w[20], w[21], selector);
|
|
w[31] = __byte_perm_S (w[19], w[20], selector);
|
|
w[30] = __byte_perm_S (w[18], w[19], selector);
|
|
w[29] = __byte_perm_S (w[17], w[18], selector);
|
|
w[28] = __byte_perm_S (w[16], w[17], selector);
|
|
w[27] = __byte_perm_S (w[15], w[16], selector);
|
|
w[26] = __byte_perm_S (w[14], w[15], selector);
|
|
w[25] = __byte_perm_S (w[13], w[14], selector);
|
|
w[24] = __byte_perm_S (w[12], w[13], selector);
|
|
w[23] = __byte_perm_S (w[11], w[12], selector);
|
|
w[22] = __byte_perm_S (w[10], w[11], selector);
|
|
w[21] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[20] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[19] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[18] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[17] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[16] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[15] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[14] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[13] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[12] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[11] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = __byte_perm_S (w[50], w[51], selector);
|
|
w[62] = __byte_perm_S (w[49], w[50], selector);
|
|
w[61] = __byte_perm_S (w[48], w[49], selector);
|
|
w[60] = __byte_perm_S (w[47], w[48], selector);
|
|
w[59] = __byte_perm_S (w[46], w[47], selector);
|
|
w[58] = __byte_perm_S (w[45], w[46], selector);
|
|
w[57] = __byte_perm_S (w[44], w[45], selector);
|
|
w[56] = __byte_perm_S (w[43], w[44], selector);
|
|
w[55] = __byte_perm_S (w[42], w[43], selector);
|
|
w[54] = __byte_perm_S (w[41], w[42], selector);
|
|
w[53] = __byte_perm_S (w[40], w[41], selector);
|
|
w[52] = __byte_perm_S (w[39], w[40], selector);
|
|
w[51] = __byte_perm_S (w[38], w[39], selector);
|
|
w[50] = __byte_perm_S (w[37], w[38], selector);
|
|
w[49] = __byte_perm_S (w[36], w[37], selector);
|
|
w[48] = __byte_perm_S (w[35], w[36], selector);
|
|
w[47] = __byte_perm_S (w[34], w[35], selector);
|
|
w[46] = __byte_perm_S (w[33], w[34], selector);
|
|
w[45] = __byte_perm_S (w[32], w[33], selector);
|
|
w[44] = __byte_perm_S (w[31], w[32], selector);
|
|
w[43] = __byte_perm_S (w[30], w[31], selector);
|
|
w[42] = __byte_perm_S (w[29], w[30], selector);
|
|
w[41] = __byte_perm_S (w[28], w[29], selector);
|
|
w[40] = __byte_perm_S (w[27], w[28], selector);
|
|
w[39] = __byte_perm_S (w[26], w[27], selector);
|
|
w[38] = __byte_perm_S (w[25], w[26], selector);
|
|
w[37] = __byte_perm_S (w[24], w[25], selector);
|
|
w[36] = __byte_perm_S (w[23], w[24], selector);
|
|
w[35] = __byte_perm_S (w[22], w[23], selector);
|
|
w[34] = __byte_perm_S (w[21], w[22], selector);
|
|
w[33] = __byte_perm_S (w[20], w[21], selector);
|
|
w[32] = __byte_perm_S (w[19], w[20], selector);
|
|
w[31] = __byte_perm_S (w[18], w[19], selector);
|
|
w[30] = __byte_perm_S (w[17], w[18], selector);
|
|
w[29] = __byte_perm_S (w[16], w[17], selector);
|
|
w[28] = __byte_perm_S (w[15], w[16], selector);
|
|
w[27] = __byte_perm_S (w[14], w[15], selector);
|
|
w[26] = __byte_perm_S (w[13], w[14], selector);
|
|
w[25] = __byte_perm_S (w[12], w[13], selector);
|
|
w[24] = __byte_perm_S (w[11], w[12], selector);
|
|
w[23] = __byte_perm_S (w[10], w[11], selector);
|
|
w[22] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[21] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[20] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[19] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[18] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[17] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[16] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[15] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[14] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[13] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[12] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = __byte_perm_S (w[49], w[50], selector);
|
|
w[62] = __byte_perm_S (w[48], w[49], selector);
|
|
w[61] = __byte_perm_S (w[47], w[48], selector);
|
|
w[60] = __byte_perm_S (w[46], w[47], selector);
|
|
w[59] = __byte_perm_S (w[45], w[46], selector);
|
|
w[58] = __byte_perm_S (w[44], w[45], selector);
|
|
w[57] = __byte_perm_S (w[43], w[44], selector);
|
|
w[56] = __byte_perm_S (w[42], w[43], selector);
|
|
w[55] = __byte_perm_S (w[41], w[42], selector);
|
|
w[54] = __byte_perm_S (w[40], w[41], selector);
|
|
w[53] = __byte_perm_S (w[39], w[40], selector);
|
|
w[52] = __byte_perm_S (w[38], w[39], selector);
|
|
w[51] = __byte_perm_S (w[37], w[38], selector);
|
|
w[50] = __byte_perm_S (w[36], w[37], selector);
|
|
w[49] = __byte_perm_S (w[35], w[36], selector);
|
|
w[48] = __byte_perm_S (w[34], w[35], selector);
|
|
w[47] = __byte_perm_S (w[33], w[34], selector);
|
|
w[46] = __byte_perm_S (w[32], w[33], selector);
|
|
w[45] = __byte_perm_S (w[31], w[32], selector);
|
|
w[44] = __byte_perm_S (w[30], w[31], selector);
|
|
w[43] = __byte_perm_S (w[29], w[30], selector);
|
|
w[42] = __byte_perm_S (w[28], w[29], selector);
|
|
w[41] = __byte_perm_S (w[27], w[28], selector);
|
|
w[40] = __byte_perm_S (w[26], w[27], selector);
|
|
w[39] = __byte_perm_S (w[25], w[26], selector);
|
|
w[38] = __byte_perm_S (w[24], w[25], selector);
|
|
w[37] = __byte_perm_S (w[23], w[24], selector);
|
|
w[36] = __byte_perm_S (w[22], w[23], selector);
|
|
w[35] = __byte_perm_S (w[21], w[22], selector);
|
|
w[34] = __byte_perm_S (w[20], w[21], selector);
|
|
w[33] = __byte_perm_S (w[19], w[20], selector);
|
|
w[32] = __byte_perm_S (w[18], w[19], selector);
|
|
w[31] = __byte_perm_S (w[17], w[18], selector);
|
|
w[30] = __byte_perm_S (w[16], w[17], selector);
|
|
w[29] = __byte_perm_S (w[15], w[16], selector);
|
|
w[28] = __byte_perm_S (w[14], w[15], selector);
|
|
w[27] = __byte_perm_S (w[13], w[14], selector);
|
|
w[26] = __byte_perm_S (w[12], w[13], selector);
|
|
w[25] = __byte_perm_S (w[11], w[12], selector);
|
|
w[24] = __byte_perm_S (w[10], w[11], selector);
|
|
w[23] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[22] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[21] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[20] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[19] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[18] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[17] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[16] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[15] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[14] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[13] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = __byte_perm_S (w[48], w[49], selector);
|
|
w[62] = __byte_perm_S (w[47], w[48], selector);
|
|
w[61] = __byte_perm_S (w[46], w[47], selector);
|
|
w[60] = __byte_perm_S (w[45], w[46], selector);
|
|
w[59] = __byte_perm_S (w[44], w[45], selector);
|
|
w[58] = __byte_perm_S (w[43], w[44], selector);
|
|
w[57] = __byte_perm_S (w[42], w[43], selector);
|
|
w[56] = __byte_perm_S (w[41], w[42], selector);
|
|
w[55] = __byte_perm_S (w[40], w[41], selector);
|
|
w[54] = __byte_perm_S (w[39], w[40], selector);
|
|
w[53] = __byte_perm_S (w[38], w[39], selector);
|
|
w[52] = __byte_perm_S (w[37], w[38], selector);
|
|
w[51] = __byte_perm_S (w[36], w[37], selector);
|
|
w[50] = __byte_perm_S (w[35], w[36], selector);
|
|
w[49] = __byte_perm_S (w[34], w[35], selector);
|
|
w[48] = __byte_perm_S (w[33], w[34], selector);
|
|
w[47] = __byte_perm_S (w[32], w[33], selector);
|
|
w[46] = __byte_perm_S (w[31], w[32], selector);
|
|
w[45] = __byte_perm_S (w[30], w[31], selector);
|
|
w[44] = __byte_perm_S (w[29], w[30], selector);
|
|
w[43] = __byte_perm_S (w[28], w[29], selector);
|
|
w[42] = __byte_perm_S (w[27], w[28], selector);
|
|
w[41] = __byte_perm_S (w[26], w[27], selector);
|
|
w[40] = __byte_perm_S (w[25], w[26], selector);
|
|
w[39] = __byte_perm_S (w[24], w[25], selector);
|
|
w[38] = __byte_perm_S (w[23], w[24], selector);
|
|
w[37] = __byte_perm_S (w[22], w[23], selector);
|
|
w[36] = __byte_perm_S (w[21], w[22], selector);
|
|
w[35] = __byte_perm_S (w[20], w[21], selector);
|
|
w[34] = __byte_perm_S (w[19], w[20], selector);
|
|
w[33] = __byte_perm_S (w[18], w[19], selector);
|
|
w[32] = __byte_perm_S (w[17], w[18], selector);
|
|
w[31] = __byte_perm_S (w[16], w[17], selector);
|
|
w[30] = __byte_perm_S (w[15], w[16], selector);
|
|
w[29] = __byte_perm_S (w[14], w[15], selector);
|
|
w[28] = __byte_perm_S (w[13], w[14], selector);
|
|
w[27] = __byte_perm_S (w[12], w[13], selector);
|
|
w[26] = __byte_perm_S (w[11], w[12], selector);
|
|
w[25] = __byte_perm_S (w[10], w[11], selector);
|
|
w[24] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[23] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[22] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[21] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[20] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[19] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[18] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[17] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[16] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[15] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[14] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = __byte_perm_S (w[47], w[48], selector);
|
|
w[62] = __byte_perm_S (w[46], w[47], selector);
|
|
w[61] = __byte_perm_S (w[45], w[46], selector);
|
|
w[60] = __byte_perm_S (w[44], w[45], selector);
|
|
w[59] = __byte_perm_S (w[43], w[44], selector);
|
|
w[58] = __byte_perm_S (w[42], w[43], selector);
|
|
w[57] = __byte_perm_S (w[41], w[42], selector);
|
|
w[56] = __byte_perm_S (w[40], w[41], selector);
|
|
w[55] = __byte_perm_S (w[39], w[40], selector);
|
|
w[54] = __byte_perm_S (w[38], w[39], selector);
|
|
w[53] = __byte_perm_S (w[37], w[38], selector);
|
|
w[52] = __byte_perm_S (w[36], w[37], selector);
|
|
w[51] = __byte_perm_S (w[35], w[36], selector);
|
|
w[50] = __byte_perm_S (w[34], w[35], selector);
|
|
w[49] = __byte_perm_S (w[33], w[34], selector);
|
|
w[48] = __byte_perm_S (w[32], w[33], selector);
|
|
w[47] = __byte_perm_S (w[31], w[32], selector);
|
|
w[46] = __byte_perm_S (w[30], w[31], selector);
|
|
w[45] = __byte_perm_S (w[29], w[30], selector);
|
|
w[44] = __byte_perm_S (w[28], w[29], selector);
|
|
w[43] = __byte_perm_S (w[27], w[28], selector);
|
|
w[42] = __byte_perm_S (w[26], w[27], selector);
|
|
w[41] = __byte_perm_S (w[25], w[26], selector);
|
|
w[40] = __byte_perm_S (w[24], w[25], selector);
|
|
w[39] = __byte_perm_S (w[23], w[24], selector);
|
|
w[38] = __byte_perm_S (w[22], w[23], selector);
|
|
w[37] = __byte_perm_S (w[21], w[22], selector);
|
|
w[36] = __byte_perm_S (w[20], w[21], selector);
|
|
w[35] = __byte_perm_S (w[19], w[20], selector);
|
|
w[34] = __byte_perm_S (w[18], w[19], selector);
|
|
w[33] = __byte_perm_S (w[17], w[18], selector);
|
|
w[32] = __byte_perm_S (w[16], w[17], selector);
|
|
w[31] = __byte_perm_S (w[15], w[16], selector);
|
|
w[30] = __byte_perm_S (w[14], w[15], selector);
|
|
w[29] = __byte_perm_S (w[13], w[14], selector);
|
|
w[28] = __byte_perm_S (w[12], w[13], selector);
|
|
w[27] = __byte_perm_S (w[11], w[12], selector);
|
|
w[26] = __byte_perm_S (w[10], w[11], selector);
|
|
w[25] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[24] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[23] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[22] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[21] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[20] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[19] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[18] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[17] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[16] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[15] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = __byte_perm_S (w[46], w[47], selector);
|
|
w[62] = __byte_perm_S (w[45], w[46], selector);
|
|
w[61] = __byte_perm_S (w[44], w[45], selector);
|
|
w[60] = __byte_perm_S (w[43], w[44], selector);
|
|
w[59] = __byte_perm_S (w[42], w[43], selector);
|
|
w[58] = __byte_perm_S (w[41], w[42], selector);
|
|
w[57] = __byte_perm_S (w[40], w[41], selector);
|
|
w[56] = __byte_perm_S (w[39], w[40], selector);
|
|
w[55] = __byte_perm_S (w[38], w[39], selector);
|
|
w[54] = __byte_perm_S (w[37], w[38], selector);
|
|
w[53] = __byte_perm_S (w[36], w[37], selector);
|
|
w[52] = __byte_perm_S (w[35], w[36], selector);
|
|
w[51] = __byte_perm_S (w[34], w[35], selector);
|
|
w[50] = __byte_perm_S (w[33], w[34], selector);
|
|
w[49] = __byte_perm_S (w[32], w[33], selector);
|
|
w[48] = __byte_perm_S (w[31], w[32], selector);
|
|
w[47] = __byte_perm_S (w[30], w[31], selector);
|
|
w[46] = __byte_perm_S (w[29], w[30], selector);
|
|
w[45] = __byte_perm_S (w[28], w[29], selector);
|
|
w[44] = __byte_perm_S (w[27], w[28], selector);
|
|
w[43] = __byte_perm_S (w[26], w[27], selector);
|
|
w[42] = __byte_perm_S (w[25], w[26], selector);
|
|
w[41] = __byte_perm_S (w[24], w[25], selector);
|
|
w[40] = __byte_perm_S (w[23], w[24], selector);
|
|
w[39] = __byte_perm_S (w[22], w[23], selector);
|
|
w[38] = __byte_perm_S (w[21], w[22], selector);
|
|
w[37] = __byte_perm_S (w[20], w[21], selector);
|
|
w[36] = __byte_perm_S (w[19], w[20], selector);
|
|
w[35] = __byte_perm_S (w[18], w[19], selector);
|
|
w[34] = __byte_perm_S (w[17], w[18], selector);
|
|
w[33] = __byte_perm_S (w[16], w[17], selector);
|
|
w[32] = __byte_perm_S (w[15], w[16], selector);
|
|
w[31] = __byte_perm_S (w[14], w[15], selector);
|
|
w[30] = __byte_perm_S (w[13], w[14], selector);
|
|
w[29] = __byte_perm_S (w[12], w[13], selector);
|
|
w[28] = __byte_perm_S (w[11], w[12], selector);
|
|
w[27] = __byte_perm_S (w[10], w[11], selector);
|
|
w[26] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[25] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[24] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[23] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[22] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[21] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[20] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[19] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[18] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[17] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[16] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = __byte_perm_S (w[45], w[46], selector);
|
|
w[62] = __byte_perm_S (w[44], w[45], selector);
|
|
w[61] = __byte_perm_S (w[43], w[44], selector);
|
|
w[60] = __byte_perm_S (w[42], w[43], selector);
|
|
w[59] = __byte_perm_S (w[41], w[42], selector);
|
|
w[58] = __byte_perm_S (w[40], w[41], selector);
|
|
w[57] = __byte_perm_S (w[39], w[40], selector);
|
|
w[56] = __byte_perm_S (w[38], w[39], selector);
|
|
w[55] = __byte_perm_S (w[37], w[38], selector);
|
|
w[54] = __byte_perm_S (w[36], w[37], selector);
|
|
w[53] = __byte_perm_S (w[35], w[36], selector);
|
|
w[52] = __byte_perm_S (w[34], w[35], selector);
|
|
w[51] = __byte_perm_S (w[33], w[34], selector);
|
|
w[50] = __byte_perm_S (w[32], w[33], selector);
|
|
w[49] = __byte_perm_S (w[31], w[32], selector);
|
|
w[48] = __byte_perm_S (w[30], w[31], selector);
|
|
w[47] = __byte_perm_S (w[29], w[30], selector);
|
|
w[46] = __byte_perm_S (w[28], w[29], selector);
|
|
w[45] = __byte_perm_S (w[27], w[28], selector);
|
|
w[44] = __byte_perm_S (w[26], w[27], selector);
|
|
w[43] = __byte_perm_S (w[25], w[26], selector);
|
|
w[42] = __byte_perm_S (w[24], w[25], selector);
|
|
w[41] = __byte_perm_S (w[23], w[24], selector);
|
|
w[40] = __byte_perm_S (w[22], w[23], selector);
|
|
w[39] = __byte_perm_S (w[21], w[22], selector);
|
|
w[38] = __byte_perm_S (w[20], w[21], selector);
|
|
w[37] = __byte_perm_S (w[19], w[20], selector);
|
|
w[36] = __byte_perm_S (w[18], w[19], selector);
|
|
w[35] = __byte_perm_S (w[17], w[18], selector);
|
|
w[34] = __byte_perm_S (w[16], w[17], selector);
|
|
w[33] = __byte_perm_S (w[15], w[16], selector);
|
|
w[32] = __byte_perm_S (w[14], w[15], selector);
|
|
w[31] = __byte_perm_S (w[13], w[14], selector);
|
|
w[30] = __byte_perm_S (w[12], w[13], selector);
|
|
w[29] = __byte_perm_S (w[11], w[12], selector);
|
|
w[28] = __byte_perm_S (w[10], w[11], selector);
|
|
w[27] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[26] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[25] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[24] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[23] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[22] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[21] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[20] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[19] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[18] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[17] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = __byte_perm_S (w[44], w[45], selector);
|
|
w[62] = __byte_perm_S (w[43], w[44], selector);
|
|
w[61] = __byte_perm_S (w[42], w[43], selector);
|
|
w[60] = __byte_perm_S (w[41], w[42], selector);
|
|
w[59] = __byte_perm_S (w[40], w[41], selector);
|
|
w[58] = __byte_perm_S (w[39], w[40], selector);
|
|
w[57] = __byte_perm_S (w[38], w[39], selector);
|
|
w[56] = __byte_perm_S (w[37], w[38], selector);
|
|
w[55] = __byte_perm_S (w[36], w[37], selector);
|
|
w[54] = __byte_perm_S (w[35], w[36], selector);
|
|
w[53] = __byte_perm_S (w[34], w[35], selector);
|
|
w[52] = __byte_perm_S (w[33], w[34], selector);
|
|
w[51] = __byte_perm_S (w[32], w[33], selector);
|
|
w[50] = __byte_perm_S (w[31], w[32], selector);
|
|
w[49] = __byte_perm_S (w[30], w[31], selector);
|
|
w[48] = __byte_perm_S (w[29], w[30], selector);
|
|
w[47] = __byte_perm_S (w[28], w[29], selector);
|
|
w[46] = __byte_perm_S (w[27], w[28], selector);
|
|
w[45] = __byte_perm_S (w[26], w[27], selector);
|
|
w[44] = __byte_perm_S (w[25], w[26], selector);
|
|
w[43] = __byte_perm_S (w[24], w[25], selector);
|
|
w[42] = __byte_perm_S (w[23], w[24], selector);
|
|
w[41] = __byte_perm_S (w[22], w[23], selector);
|
|
w[40] = __byte_perm_S (w[21], w[22], selector);
|
|
w[39] = __byte_perm_S (w[20], w[21], selector);
|
|
w[38] = __byte_perm_S (w[19], w[20], selector);
|
|
w[37] = __byte_perm_S (w[18], w[19], selector);
|
|
w[36] = __byte_perm_S (w[17], w[18], selector);
|
|
w[35] = __byte_perm_S (w[16], w[17], selector);
|
|
w[34] = __byte_perm_S (w[15], w[16], selector);
|
|
w[33] = __byte_perm_S (w[14], w[15], selector);
|
|
w[32] = __byte_perm_S (w[13], w[14], selector);
|
|
w[31] = __byte_perm_S (w[12], w[13], selector);
|
|
w[30] = __byte_perm_S (w[11], w[12], selector);
|
|
w[29] = __byte_perm_S (w[10], w[11], selector);
|
|
w[28] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[27] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[26] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[25] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[24] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[23] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[22] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[21] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[20] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[19] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[18] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = __byte_perm_S (w[43], w[44], selector);
|
|
w[62] = __byte_perm_S (w[42], w[43], selector);
|
|
w[61] = __byte_perm_S (w[41], w[42], selector);
|
|
w[60] = __byte_perm_S (w[40], w[41], selector);
|
|
w[59] = __byte_perm_S (w[39], w[40], selector);
|
|
w[58] = __byte_perm_S (w[38], w[39], selector);
|
|
w[57] = __byte_perm_S (w[37], w[38], selector);
|
|
w[56] = __byte_perm_S (w[36], w[37], selector);
|
|
w[55] = __byte_perm_S (w[35], w[36], selector);
|
|
w[54] = __byte_perm_S (w[34], w[35], selector);
|
|
w[53] = __byte_perm_S (w[33], w[34], selector);
|
|
w[52] = __byte_perm_S (w[32], w[33], selector);
|
|
w[51] = __byte_perm_S (w[31], w[32], selector);
|
|
w[50] = __byte_perm_S (w[30], w[31], selector);
|
|
w[49] = __byte_perm_S (w[29], w[30], selector);
|
|
w[48] = __byte_perm_S (w[28], w[29], selector);
|
|
w[47] = __byte_perm_S (w[27], w[28], selector);
|
|
w[46] = __byte_perm_S (w[26], w[27], selector);
|
|
w[45] = __byte_perm_S (w[25], w[26], selector);
|
|
w[44] = __byte_perm_S (w[24], w[25], selector);
|
|
w[43] = __byte_perm_S (w[23], w[24], selector);
|
|
w[42] = __byte_perm_S (w[22], w[23], selector);
|
|
w[41] = __byte_perm_S (w[21], w[22], selector);
|
|
w[40] = __byte_perm_S (w[20], w[21], selector);
|
|
w[39] = __byte_perm_S (w[19], w[20], selector);
|
|
w[38] = __byte_perm_S (w[18], w[19], selector);
|
|
w[37] = __byte_perm_S (w[17], w[18], selector);
|
|
w[36] = __byte_perm_S (w[16], w[17], selector);
|
|
w[35] = __byte_perm_S (w[15], w[16], selector);
|
|
w[34] = __byte_perm_S (w[14], w[15], selector);
|
|
w[33] = __byte_perm_S (w[13], w[14], selector);
|
|
w[32] = __byte_perm_S (w[12], w[13], selector);
|
|
w[31] = __byte_perm_S (w[11], w[12], selector);
|
|
w[30] = __byte_perm_S (w[10], w[11], selector);
|
|
w[29] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[28] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[27] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[26] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[25] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[24] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[23] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[22] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[21] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[20] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[19] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = __byte_perm_S (w[42], w[43], selector);
|
|
w[62] = __byte_perm_S (w[41], w[42], selector);
|
|
w[61] = __byte_perm_S (w[40], w[41], selector);
|
|
w[60] = __byte_perm_S (w[39], w[40], selector);
|
|
w[59] = __byte_perm_S (w[38], w[39], selector);
|
|
w[58] = __byte_perm_S (w[37], w[38], selector);
|
|
w[57] = __byte_perm_S (w[36], w[37], selector);
|
|
w[56] = __byte_perm_S (w[35], w[36], selector);
|
|
w[55] = __byte_perm_S (w[34], w[35], selector);
|
|
w[54] = __byte_perm_S (w[33], w[34], selector);
|
|
w[53] = __byte_perm_S (w[32], w[33], selector);
|
|
w[52] = __byte_perm_S (w[31], w[32], selector);
|
|
w[51] = __byte_perm_S (w[30], w[31], selector);
|
|
w[50] = __byte_perm_S (w[29], w[30], selector);
|
|
w[49] = __byte_perm_S (w[28], w[29], selector);
|
|
w[48] = __byte_perm_S (w[27], w[28], selector);
|
|
w[47] = __byte_perm_S (w[26], w[27], selector);
|
|
w[46] = __byte_perm_S (w[25], w[26], selector);
|
|
w[45] = __byte_perm_S (w[24], w[25], selector);
|
|
w[44] = __byte_perm_S (w[23], w[24], selector);
|
|
w[43] = __byte_perm_S (w[22], w[23], selector);
|
|
w[42] = __byte_perm_S (w[21], w[22], selector);
|
|
w[41] = __byte_perm_S (w[20], w[21], selector);
|
|
w[40] = __byte_perm_S (w[19], w[20], selector);
|
|
w[39] = __byte_perm_S (w[18], w[19], selector);
|
|
w[38] = __byte_perm_S (w[17], w[18], selector);
|
|
w[37] = __byte_perm_S (w[16], w[17], selector);
|
|
w[36] = __byte_perm_S (w[15], w[16], selector);
|
|
w[35] = __byte_perm_S (w[14], w[15], selector);
|
|
w[34] = __byte_perm_S (w[13], w[14], selector);
|
|
w[33] = __byte_perm_S (w[12], w[13], selector);
|
|
w[32] = __byte_perm_S (w[11], w[12], selector);
|
|
w[31] = __byte_perm_S (w[10], w[11], selector);
|
|
w[30] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[29] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[28] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[27] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[26] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[25] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[24] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[23] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[22] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[21] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[20] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = __byte_perm_S (w[41], w[42], selector);
|
|
w[62] = __byte_perm_S (w[40], w[41], selector);
|
|
w[61] = __byte_perm_S (w[39], w[40], selector);
|
|
w[60] = __byte_perm_S (w[38], w[39], selector);
|
|
w[59] = __byte_perm_S (w[37], w[38], selector);
|
|
w[58] = __byte_perm_S (w[36], w[37], selector);
|
|
w[57] = __byte_perm_S (w[35], w[36], selector);
|
|
w[56] = __byte_perm_S (w[34], w[35], selector);
|
|
w[55] = __byte_perm_S (w[33], w[34], selector);
|
|
w[54] = __byte_perm_S (w[32], w[33], selector);
|
|
w[53] = __byte_perm_S (w[31], w[32], selector);
|
|
w[52] = __byte_perm_S (w[30], w[31], selector);
|
|
w[51] = __byte_perm_S (w[29], w[30], selector);
|
|
w[50] = __byte_perm_S (w[28], w[29], selector);
|
|
w[49] = __byte_perm_S (w[27], w[28], selector);
|
|
w[48] = __byte_perm_S (w[26], w[27], selector);
|
|
w[47] = __byte_perm_S (w[25], w[26], selector);
|
|
w[46] = __byte_perm_S (w[24], w[25], selector);
|
|
w[45] = __byte_perm_S (w[23], w[24], selector);
|
|
w[44] = __byte_perm_S (w[22], w[23], selector);
|
|
w[43] = __byte_perm_S (w[21], w[22], selector);
|
|
w[42] = __byte_perm_S (w[20], w[21], selector);
|
|
w[41] = __byte_perm_S (w[19], w[20], selector);
|
|
w[40] = __byte_perm_S (w[18], w[19], selector);
|
|
w[39] = __byte_perm_S (w[17], w[18], selector);
|
|
w[38] = __byte_perm_S (w[16], w[17], selector);
|
|
w[37] = __byte_perm_S (w[15], w[16], selector);
|
|
w[36] = __byte_perm_S (w[14], w[15], selector);
|
|
w[35] = __byte_perm_S (w[13], w[14], selector);
|
|
w[34] = __byte_perm_S (w[12], w[13], selector);
|
|
w[33] = __byte_perm_S (w[11], w[12], selector);
|
|
w[32] = __byte_perm_S (w[10], w[11], selector);
|
|
w[31] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[30] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[29] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[28] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[27] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[26] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[25] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[24] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[23] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[22] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[21] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = __byte_perm_S (w[40], w[41], selector);
|
|
w[62] = __byte_perm_S (w[39], w[40], selector);
|
|
w[61] = __byte_perm_S (w[38], w[39], selector);
|
|
w[60] = __byte_perm_S (w[37], w[38], selector);
|
|
w[59] = __byte_perm_S (w[36], w[37], selector);
|
|
w[58] = __byte_perm_S (w[35], w[36], selector);
|
|
w[57] = __byte_perm_S (w[34], w[35], selector);
|
|
w[56] = __byte_perm_S (w[33], w[34], selector);
|
|
w[55] = __byte_perm_S (w[32], w[33], selector);
|
|
w[54] = __byte_perm_S (w[31], w[32], selector);
|
|
w[53] = __byte_perm_S (w[30], w[31], selector);
|
|
w[52] = __byte_perm_S (w[29], w[30], selector);
|
|
w[51] = __byte_perm_S (w[28], w[29], selector);
|
|
w[50] = __byte_perm_S (w[27], w[28], selector);
|
|
w[49] = __byte_perm_S (w[26], w[27], selector);
|
|
w[48] = __byte_perm_S (w[25], w[26], selector);
|
|
w[47] = __byte_perm_S (w[24], w[25], selector);
|
|
w[46] = __byte_perm_S (w[23], w[24], selector);
|
|
w[45] = __byte_perm_S (w[22], w[23], selector);
|
|
w[44] = __byte_perm_S (w[21], w[22], selector);
|
|
w[43] = __byte_perm_S (w[20], w[21], selector);
|
|
w[42] = __byte_perm_S (w[19], w[20], selector);
|
|
w[41] = __byte_perm_S (w[18], w[19], selector);
|
|
w[40] = __byte_perm_S (w[17], w[18], selector);
|
|
w[39] = __byte_perm_S (w[16], w[17], selector);
|
|
w[38] = __byte_perm_S (w[15], w[16], selector);
|
|
w[37] = __byte_perm_S (w[14], w[15], selector);
|
|
w[36] = __byte_perm_S (w[13], w[14], selector);
|
|
w[35] = __byte_perm_S (w[12], w[13], selector);
|
|
w[34] = __byte_perm_S (w[11], w[12], selector);
|
|
w[33] = __byte_perm_S (w[10], w[11], selector);
|
|
w[32] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[31] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[30] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[29] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[28] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[27] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[26] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[25] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[24] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[23] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[22] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = __byte_perm_S (w[39], w[40], selector);
|
|
w[62] = __byte_perm_S (w[38], w[39], selector);
|
|
w[61] = __byte_perm_S (w[37], w[38], selector);
|
|
w[60] = __byte_perm_S (w[36], w[37], selector);
|
|
w[59] = __byte_perm_S (w[35], w[36], selector);
|
|
w[58] = __byte_perm_S (w[34], w[35], selector);
|
|
w[57] = __byte_perm_S (w[33], w[34], selector);
|
|
w[56] = __byte_perm_S (w[32], w[33], selector);
|
|
w[55] = __byte_perm_S (w[31], w[32], selector);
|
|
w[54] = __byte_perm_S (w[30], w[31], selector);
|
|
w[53] = __byte_perm_S (w[29], w[30], selector);
|
|
w[52] = __byte_perm_S (w[28], w[29], selector);
|
|
w[51] = __byte_perm_S (w[27], w[28], selector);
|
|
w[50] = __byte_perm_S (w[26], w[27], selector);
|
|
w[49] = __byte_perm_S (w[25], w[26], selector);
|
|
w[48] = __byte_perm_S (w[24], w[25], selector);
|
|
w[47] = __byte_perm_S (w[23], w[24], selector);
|
|
w[46] = __byte_perm_S (w[22], w[23], selector);
|
|
w[45] = __byte_perm_S (w[21], w[22], selector);
|
|
w[44] = __byte_perm_S (w[20], w[21], selector);
|
|
w[43] = __byte_perm_S (w[19], w[20], selector);
|
|
w[42] = __byte_perm_S (w[18], w[19], selector);
|
|
w[41] = __byte_perm_S (w[17], w[18], selector);
|
|
w[40] = __byte_perm_S (w[16], w[17], selector);
|
|
w[39] = __byte_perm_S (w[15], w[16], selector);
|
|
w[38] = __byte_perm_S (w[14], w[15], selector);
|
|
w[37] = __byte_perm_S (w[13], w[14], selector);
|
|
w[36] = __byte_perm_S (w[12], w[13], selector);
|
|
w[35] = __byte_perm_S (w[11], w[12], selector);
|
|
w[34] = __byte_perm_S (w[10], w[11], selector);
|
|
w[33] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[32] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[31] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[30] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[29] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[28] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[27] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[26] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[25] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[24] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[23] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = __byte_perm_S (w[38], w[39], selector);
|
|
w[62] = __byte_perm_S (w[37], w[38], selector);
|
|
w[61] = __byte_perm_S (w[36], w[37], selector);
|
|
w[60] = __byte_perm_S (w[35], w[36], selector);
|
|
w[59] = __byte_perm_S (w[34], w[35], selector);
|
|
w[58] = __byte_perm_S (w[33], w[34], selector);
|
|
w[57] = __byte_perm_S (w[32], w[33], selector);
|
|
w[56] = __byte_perm_S (w[31], w[32], selector);
|
|
w[55] = __byte_perm_S (w[30], w[31], selector);
|
|
w[54] = __byte_perm_S (w[29], w[30], selector);
|
|
w[53] = __byte_perm_S (w[28], w[29], selector);
|
|
w[52] = __byte_perm_S (w[27], w[28], selector);
|
|
w[51] = __byte_perm_S (w[26], w[27], selector);
|
|
w[50] = __byte_perm_S (w[25], w[26], selector);
|
|
w[49] = __byte_perm_S (w[24], w[25], selector);
|
|
w[48] = __byte_perm_S (w[23], w[24], selector);
|
|
w[47] = __byte_perm_S (w[22], w[23], selector);
|
|
w[46] = __byte_perm_S (w[21], w[22], selector);
|
|
w[45] = __byte_perm_S (w[20], w[21], selector);
|
|
w[44] = __byte_perm_S (w[19], w[20], selector);
|
|
w[43] = __byte_perm_S (w[18], w[19], selector);
|
|
w[42] = __byte_perm_S (w[17], w[18], selector);
|
|
w[41] = __byte_perm_S (w[16], w[17], selector);
|
|
w[40] = __byte_perm_S (w[15], w[16], selector);
|
|
w[39] = __byte_perm_S (w[14], w[15], selector);
|
|
w[38] = __byte_perm_S (w[13], w[14], selector);
|
|
w[37] = __byte_perm_S (w[12], w[13], selector);
|
|
w[36] = __byte_perm_S (w[11], w[12], selector);
|
|
w[35] = __byte_perm_S (w[10], w[11], selector);
|
|
w[34] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[33] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[32] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[31] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[30] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[29] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[28] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[27] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[26] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[25] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[24] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = __byte_perm_S (w[37], w[38], selector);
|
|
w[62] = __byte_perm_S (w[36], w[37], selector);
|
|
w[61] = __byte_perm_S (w[35], w[36], selector);
|
|
w[60] = __byte_perm_S (w[34], w[35], selector);
|
|
w[59] = __byte_perm_S (w[33], w[34], selector);
|
|
w[58] = __byte_perm_S (w[32], w[33], selector);
|
|
w[57] = __byte_perm_S (w[31], w[32], selector);
|
|
w[56] = __byte_perm_S (w[30], w[31], selector);
|
|
w[55] = __byte_perm_S (w[29], w[30], selector);
|
|
w[54] = __byte_perm_S (w[28], w[29], selector);
|
|
w[53] = __byte_perm_S (w[27], w[28], selector);
|
|
w[52] = __byte_perm_S (w[26], w[27], selector);
|
|
w[51] = __byte_perm_S (w[25], w[26], selector);
|
|
w[50] = __byte_perm_S (w[24], w[25], selector);
|
|
w[49] = __byte_perm_S (w[23], w[24], selector);
|
|
w[48] = __byte_perm_S (w[22], w[23], selector);
|
|
w[47] = __byte_perm_S (w[21], w[22], selector);
|
|
w[46] = __byte_perm_S (w[20], w[21], selector);
|
|
w[45] = __byte_perm_S (w[19], w[20], selector);
|
|
w[44] = __byte_perm_S (w[18], w[19], selector);
|
|
w[43] = __byte_perm_S (w[17], w[18], selector);
|
|
w[42] = __byte_perm_S (w[16], w[17], selector);
|
|
w[41] = __byte_perm_S (w[15], w[16], selector);
|
|
w[40] = __byte_perm_S (w[14], w[15], selector);
|
|
w[39] = __byte_perm_S (w[13], w[14], selector);
|
|
w[38] = __byte_perm_S (w[12], w[13], selector);
|
|
w[37] = __byte_perm_S (w[11], w[12], selector);
|
|
w[36] = __byte_perm_S (w[10], w[11], selector);
|
|
w[35] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[34] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[33] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[32] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[31] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[30] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[29] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[28] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[27] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[26] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[25] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = __byte_perm_S (w[36], w[37], selector);
|
|
w[62] = __byte_perm_S (w[35], w[36], selector);
|
|
w[61] = __byte_perm_S (w[34], w[35], selector);
|
|
w[60] = __byte_perm_S (w[33], w[34], selector);
|
|
w[59] = __byte_perm_S (w[32], w[33], selector);
|
|
w[58] = __byte_perm_S (w[31], w[32], selector);
|
|
w[57] = __byte_perm_S (w[30], w[31], selector);
|
|
w[56] = __byte_perm_S (w[29], w[30], selector);
|
|
w[55] = __byte_perm_S (w[28], w[29], selector);
|
|
w[54] = __byte_perm_S (w[27], w[28], selector);
|
|
w[53] = __byte_perm_S (w[26], w[27], selector);
|
|
w[52] = __byte_perm_S (w[25], w[26], selector);
|
|
w[51] = __byte_perm_S (w[24], w[25], selector);
|
|
w[50] = __byte_perm_S (w[23], w[24], selector);
|
|
w[49] = __byte_perm_S (w[22], w[23], selector);
|
|
w[48] = __byte_perm_S (w[21], w[22], selector);
|
|
w[47] = __byte_perm_S (w[20], w[21], selector);
|
|
w[46] = __byte_perm_S (w[19], w[20], selector);
|
|
w[45] = __byte_perm_S (w[18], w[19], selector);
|
|
w[44] = __byte_perm_S (w[17], w[18], selector);
|
|
w[43] = __byte_perm_S (w[16], w[17], selector);
|
|
w[42] = __byte_perm_S (w[15], w[16], selector);
|
|
w[41] = __byte_perm_S (w[14], w[15], selector);
|
|
w[40] = __byte_perm_S (w[13], w[14], selector);
|
|
w[39] = __byte_perm_S (w[12], w[13], selector);
|
|
w[38] = __byte_perm_S (w[11], w[12], selector);
|
|
w[37] = __byte_perm_S (w[10], w[11], selector);
|
|
w[36] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[35] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[34] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[33] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[32] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[31] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[30] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[29] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[28] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[27] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[26] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = __byte_perm_S (w[35], w[36], selector);
|
|
w[62] = __byte_perm_S (w[34], w[35], selector);
|
|
w[61] = __byte_perm_S (w[33], w[34], selector);
|
|
w[60] = __byte_perm_S (w[32], w[33], selector);
|
|
w[59] = __byte_perm_S (w[31], w[32], selector);
|
|
w[58] = __byte_perm_S (w[30], w[31], selector);
|
|
w[57] = __byte_perm_S (w[29], w[30], selector);
|
|
w[56] = __byte_perm_S (w[28], w[29], selector);
|
|
w[55] = __byte_perm_S (w[27], w[28], selector);
|
|
w[54] = __byte_perm_S (w[26], w[27], selector);
|
|
w[53] = __byte_perm_S (w[25], w[26], selector);
|
|
w[52] = __byte_perm_S (w[24], w[25], selector);
|
|
w[51] = __byte_perm_S (w[23], w[24], selector);
|
|
w[50] = __byte_perm_S (w[22], w[23], selector);
|
|
w[49] = __byte_perm_S (w[21], w[22], selector);
|
|
w[48] = __byte_perm_S (w[20], w[21], selector);
|
|
w[47] = __byte_perm_S (w[19], w[20], selector);
|
|
w[46] = __byte_perm_S (w[18], w[19], selector);
|
|
w[45] = __byte_perm_S (w[17], w[18], selector);
|
|
w[44] = __byte_perm_S (w[16], w[17], selector);
|
|
w[43] = __byte_perm_S (w[15], w[16], selector);
|
|
w[42] = __byte_perm_S (w[14], w[15], selector);
|
|
w[41] = __byte_perm_S (w[13], w[14], selector);
|
|
w[40] = __byte_perm_S (w[12], w[13], selector);
|
|
w[39] = __byte_perm_S (w[11], w[12], selector);
|
|
w[38] = __byte_perm_S (w[10], w[11], selector);
|
|
w[37] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[36] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[35] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[34] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[33] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[32] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[31] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[30] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[29] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[28] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[27] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = __byte_perm_S (w[34], w[35], selector);
|
|
w[62] = __byte_perm_S (w[33], w[34], selector);
|
|
w[61] = __byte_perm_S (w[32], w[33], selector);
|
|
w[60] = __byte_perm_S (w[31], w[32], selector);
|
|
w[59] = __byte_perm_S (w[30], w[31], selector);
|
|
w[58] = __byte_perm_S (w[29], w[30], selector);
|
|
w[57] = __byte_perm_S (w[28], w[29], selector);
|
|
w[56] = __byte_perm_S (w[27], w[28], selector);
|
|
w[55] = __byte_perm_S (w[26], w[27], selector);
|
|
w[54] = __byte_perm_S (w[25], w[26], selector);
|
|
w[53] = __byte_perm_S (w[24], w[25], selector);
|
|
w[52] = __byte_perm_S (w[23], w[24], selector);
|
|
w[51] = __byte_perm_S (w[22], w[23], selector);
|
|
w[50] = __byte_perm_S (w[21], w[22], selector);
|
|
w[49] = __byte_perm_S (w[20], w[21], selector);
|
|
w[48] = __byte_perm_S (w[19], w[20], selector);
|
|
w[47] = __byte_perm_S (w[18], w[19], selector);
|
|
w[46] = __byte_perm_S (w[17], w[18], selector);
|
|
w[45] = __byte_perm_S (w[16], w[17], selector);
|
|
w[44] = __byte_perm_S (w[15], w[16], selector);
|
|
w[43] = __byte_perm_S (w[14], w[15], selector);
|
|
w[42] = __byte_perm_S (w[13], w[14], selector);
|
|
w[41] = __byte_perm_S (w[12], w[13], selector);
|
|
w[40] = __byte_perm_S (w[11], w[12], selector);
|
|
w[39] = __byte_perm_S (w[10], w[11], selector);
|
|
w[38] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[37] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[36] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[35] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[34] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[33] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[32] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[31] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[30] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[29] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[28] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = __byte_perm_S (w[33], w[34], selector);
|
|
w[62] = __byte_perm_S (w[32], w[33], selector);
|
|
w[61] = __byte_perm_S (w[31], w[32], selector);
|
|
w[60] = __byte_perm_S (w[30], w[31], selector);
|
|
w[59] = __byte_perm_S (w[29], w[30], selector);
|
|
w[58] = __byte_perm_S (w[28], w[29], selector);
|
|
w[57] = __byte_perm_S (w[27], w[28], selector);
|
|
w[56] = __byte_perm_S (w[26], w[27], selector);
|
|
w[55] = __byte_perm_S (w[25], w[26], selector);
|
|
w[54] = __byte_perm_S (w[24], w[25], selector);
|
|
w[53] = __byte_perm_S (w[23], w[24], selector);
|
|
w[52] = __byte_perm_S (w[22], w[23], selector);
|
|
w[51] = __byte_perm_S (w[21], w[22], selector);
|
|
w[50] = __byte_perm_S (w[20], w[21], selector);
|
|
w[49] = __byte_perm_S (w[19], w[20], selector);
|
|
w[48] = __byte_perm_S (w[18], w[19], selector);
|
|
w[47] = __byte_perm_S (w[17], w[18], selector);
|
|
w[46] = __byte_perm_S (w[16], w[17], selector);
|
|
w[45] = __byte_perm_S (w[15], w[16], selector);
|
|
w[44] = __byte_perm_S (w[14], w[15], selector);
|
|
w[43] = __byte_perm_S (w[13], w[14], selector);
|
|
w[42] = __byte_perm_S (w[12], w[13], selector);
|
|
w[41] = __byte_perm_S (w[11], w[12], selector);
|
|
w[40] = __byte_perm_S (w[10], w[11], selector);
|
|
w[39] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[38] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[37] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[36] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[35] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[34] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[33] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[32] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[31] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[30] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[29] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = __byte_perm_S (w[32], w[33], selector);
|
|
w[62] = __byte_perm_S (w[31], w[32], selector);
|
|
w[61] = __byte_perm_S (w[30], w[31], selector);
|
|
w[60] = __byte_perm_S (w[29], w[30], selector);
|
|
w[59] = __byte_perm_S (w[28], w[29], selector);
|
|
w[58] = __byte_perm_S (w[27], w[28], selector);
|
|
w[57] = __byte_perm_S (w[26], w[27], selector);
|
|
w[56] = __byte_perm_S (w[25], w[26], selector);
|
|
w[55] = __byte_perm_S (w[24], w[25], selector);
|
|
w[54] = __byte_perm_S (w[23], w[24], selector);
|
|
w[53] = __byte_perm_S (w[22], w[23], selector);
|
|
w[52] = __byte_perm_S (w[21], w[22], selector);
|
|
w[51] = __byte_perm_S (w[20], w[21], selector);
|
|
w[50] = __byte_perm_S (w[19], w[20], selector);
|
|
w[49] = __byte_perm_S (w[18], w[19], selector);
|
|
w[48] = __byte_perm_S (w[17], w[18], selector);
|
|
w[47] = __byte_perm_S (w[16], w[17], selector);
|
|
w[46] = __byte_perm_S (w[15], w[16], selector);
|
|
w[45] = __byte_perm_S (w[14], w[15], selector);
|
|
w[44] = __byte_perm_S (w[13], w[14], selector);
|
|
w[43] = __byte_perm_S (w[12], w[13], selector);
|
|
w[42] = __byte_perm_S (w[11], w[12], selector);
|
|
w[41] = __byte_perm_S (w[10], w[11], selector);
|
|
w[40] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[39] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[38] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[37] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[36] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[35] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[34] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[33] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[32] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[31] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[30] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = __byte_perm_S (w[31], w[32], selector);
|
|
w[62] = __byte_perm_S (w[30], w[31], selector);
|
|
w[61] = __byte_perm_S (w[29], w[30], selector);
|
|
w[60] = __byte_perm_S (w[28], w[29], selector);
|
|
w[59] = __byte_perm_S (w[27], w[28], selector);
|
|
w[58] = __byte_perm_S (w[26], w[27], selector);
|
|
w[57] = __byte_perm_S (w[25], w[26], selector);
|
|
w[56] = __byte_perm_S (w[24], w[25], selector);
|
|
w[55] = __byte_perm_S (w[23], w[24], selector);
|
|
w[54] = __byte_perm_S (w[22], w[23], selector);
|
|
w[53] = __byte_perm_S (w[21], w[22], selector);
|
|
w[52] = __byte_perm_S (w[20], w[21], selector);
|
|
w[51] = __byte_perm_S (w[19], w[20], selector);
|
|
w[50] = __byte_perm_S (w[18], w[19], selector);
|
|
w[49] = __byte_perm_S (w[17], w[18], selector);
|
|
w[48] = __byte_perm_S (w[16], w[17], selector);
|
|
w[47] = __byte_perm_S (w[15], w[16], selector);
|
|
w[46] = __byte_perm_S (w[14], w[15], selector);
|
|
w[45] = __byte_perm_S (w[13], w[14], selector);
|
|
w[44] = __byte_perm_S (w[12], w[13], selector);
|
|
w[43] = __byte_perm_S (w[11], w[12], selector);
|
|
w[42] = __byte_perm_S (w[10], w[11], selector);
|
|
w[41] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[40] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[39] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[38] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[37] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[36] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[35] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[34] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[33] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[32] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[31] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = __byte_perm_S (w[30], w[31], selector);
|
|
w[62] = __byte_perm_S (w[29], w[30], selector);
|
|
w[61] = __byte_perm_S (w[28], w[29], selector);
|
|
w[60] = __byte_perm_S (w[27], w[28], selector);
|
|
w[59] = __byte_perm_S (w[26], w[27], selector);
|
|
w[58] = __byte_perm_S (w[25], w[26], selector);
|
|
w[57] = __byte_perm_S (w[24], w[25], selector);
|
|
w[56] = __byte_perm_S (w[23], w[24], selector);
|
|
w[55] = __byte_perm_S (w[22], w[23], selector);
|
|
w[54] = __byte_perm_S (w[21], w[22], selector);
|
|
w[53] = __byte_perm_S (w[20], w[21], selector);
|
|
w[52] = __byte_perm_S (w[19], w[20], selector);
|
|
w[51] = __byte_perm_S (w[18], w[19], selector);
|
|
w[50] = __byte_perm_S (w[17], w[18], selector);
|
|
w[49] = __byte_perm_S (w[16], w[17], selector);
|
|
w[48] = __byte_perm_S (w[15], w[16], selector);
|
|
w[47] = __byte_perm_S (w[14], w[15], selector);
|
|
w[46] = __byte_perm_S (w[13], w[14], selector);
|
|
w[45] = __byte_perm_S (w[12], w[13], selector);
|
|
w[44] = __byte_perm_S (w[11], w[12], selector);
|
|
w[43] = __byte_perm_S (w[10], w[11], selector);
|
|
w[42] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[41] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[40] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[39] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[38] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[37] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[36] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[35] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[34] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[33] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[32] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = __byte_perm_S (w[29], w[30], selector);
|
|
w[62] = __byte_perm_S (w[28], w[29], selector);
|
|
w[61] = __byte_perm_S (w[27], w[28], selector);
|
|
w[60] = __byte_perm_S (w[26], w[27], selector);
|
|
w[59] = __byte_perm_S (w[25], w[26], selector);
|
|
w[58] = __byte_perm_S (w[24], w[25], selector);
|
|
w[57] = __byte_perm_S (w[23], w[24], selector);
|
|
w[56] = __byte_perm_S (w[22], w[23], selector);
|
|
w[55] = __byte_perm_S (w[21], w[22], selector);
|
|
w[54] = __byte_perm_S (w[20], w[21], selector);
|
|
w[53] = __byte_perm_S (w[19], w[20], selector);
|
|
w[52] = __byte_perm_S (w[18], w[19], selector);
|
|
w[51] = __byte_perm_S (w[17], w[18], selector);
|
|
w[50] = __byte_perm_S (w[16], w[17], selector);
|
|
w[49] = __byte_perm_S (w[15], w[16], selector);
|
|
w[48] = __byte_perm_S (w[14], w[15], selector);
|
|
w[47] = __byte_perm_S (w[13], w[14], selector);
|
|
w[46] = __byte_perm_S (w[12], w[13], selector);
|
|
w[45] = __byte_perm_S (w[11], w[12], selector);
|
|
w[44] = __byte_perm_S (w[10], w[11], selector);
|
|
w[43] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[42] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[41] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[40] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[39] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[38] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[37] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[36] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[35] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[34] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[33] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = __byte_perm_S (w[28], w[29], selector);
|
|
w[62] = __byte_perm_S (w[27], w[28], selector);
|
|
w[61] = __byte_perm_S (w[26], w[27], selector);
|
|
w[60] = __byte_perm_S (w[25], w[26], selector);
|
|
w[59] = __byte_perm_S (w[24], w[25], selector);
|
|
w[58] = __byte_perm_S (w[23], w[24], selector);
|
|
w[57] = __byte_perm_S (w[22], w[23], selector);
|
|
w[56] = __byte_perm_S (w[21], w[22], selector);
|
|
w[55] = __byte_perm_S (w[20], w[21], selector);
|
|
w[54] = __byte_perm_S (w[19], w[20], selector);
|
|
w[53] = __byte_perm_S (w[18], w[19], selector);
|
|
w[52] = __byte_perm_S (w[17], w[18], selector);
|
|
w[51] = __byte_perm_S (w[16], w[17], selector);
|
|
w[50] = __byte_perm_S (w[15], w[16], selector);
|
|
w[49] = __byte_perm_S (w[14], w[15], selector);
|
|
w[48] = __byte_perm_S (w[13], w[14], selector);
|
|
w[47] = __byte_perm_S (w[12], w[13], selector);
|
|
w[46] = __byte_perm_S (w[11], w[12], selector);
|
|
w[45] = __byte_perm_S (w[10], w[11], selector);
|
|
w[44] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[43] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[42] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[41] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[40] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[39] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[38] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[37] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[36] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[35] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[34] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = __byte_perm_S (w[27], w[28], selector);
|
|
w[62] = __byte_perm_S (w[26], w[27], selector);
|
|
w[61] = __byte_perm_S (w[25], w[26], selector);
|
|
w[60] = __byte_perm_S (w[24], w[25], selector);
|
|
w[59] = __byte_perm_S (w[23], w[24], selector);
|
|
w[58] = __byte_perm_S (w[22], w[23], selector);
|
|
w[57] = __byte_perm_S (w[21], w[22], selector);
|
|
w[56] = __byte_perm_S (w[20], w[21], selector);
|
|
w[55] = __byte_perm_S (w[19], w[20], selector);
|
|
w[54] = __byte_perm_S (w[18], w[19], selector);
|
|
w[53] = __byte_perm_S (w[17], w[18], selector);
|
|
w[52] = __byte_perm_S (w[16], w[17], selector);
|
|
w[51] = __byte_perm_S (w[15], w[16], selector);
|
|
w[50] = __byte_perm_S (w[14], w[15], selector);
|
|
w[49] = __byte_perm_S (w[13], w[14], selector);
|
|
w[48] = __byte_perm_S (w[12], w[13], selector);
|
|
w[47] = __byte_perm_S (w[11], w[12], selector);
|
|
w[46] = __byte_perm_S (w[10], w[11], selector);
|
|
w[45] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[44] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[43] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[42] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[41] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[40] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[39] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[38] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[37] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[36] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[35] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = __byte_perm_S (w[26], w[27], selector);
|
|
w[62] = __byte_perm_S (w[25], w[26], selector);
|
|
w[61] = __byte_perm_S (w[24], w[25], selector);
|
|
w[60] = __byte_perm_S (w[23], w[24], selector);
|
|
w[59] = __byte_perm_S (w[22], w[23], selector);
|
|
w[58] = __byte_perm_S (w[21], w[22], selector);
|
|
w[57] = __byte_perm_S (w[20], w[21], selector);
|
|
w[56] = __byte_perm_S (w[19], w[20], selector);
|
|
w[55] = __byte_perm_S (w[18], w[19], selector);
|
|
w[54] = __byte_perm_S (w[17], w[18], selector);
|
|
w[53] = __byte_perm_S (w[16], w[17], selector);
|
|
w[52] = __byte_perm_S (w[15], w[16], selector);
|
|
w[51] = __byte_perm_S (w[14], w[15], selector);
|
|
w[50] = __byte_perm_S (w[13], w[14], selector);
|
|
w[49] = __byte_perm_S (w[12], w[13], selector);
|
|
w[48] = __byte_perm_S (w[11], w[12], selector);
|
|
w[47] = __byte_perm_S (w[10], w[11], selector);
|
|
w[46] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[45] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[44] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[43] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[42] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[41] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[40] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[39] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[38] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[37] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[36] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = __byte_perm_S (w[25], w[26], selector);
|
|
w[62] = __byte_perm_S (w[24], w[25], selector);
|
|
w[61] = __byte_perm_S (w[23], w[24], selector);
|
|
w[60] = __byte_perm_S (w[22], w[23], selector);
|
|
w[59] = __byte_perm_S (w[21], w[22], selector);
|
|
w[58] = __byte_perm_S (w[20], w[21], selector);
|
|
w[57] = __byte_perm_S (w[19], w[20], selector);
|
|
w[56] = __byte_perm_S (w[18], w[19], selector);
|
|
w[55] = __byte_perm_S (w[17], w[18], selector);
|
|
w[54] = __byte_perm_S (w[16], w[17], selector);
|
|
w[53] = __byte_perm_S (w[15], w[16], selector);
|
|
w[52] = __byte_perm_S (w[14], w[15], selector);
|
|
w[51] = __byte_perm_S (w[13], w[14], selector);
|
|
w[50] = __byte_perm_S (w[12], w[13], selector);
|
|
w[49] = __byte_perm_S (w[11], w[12], selector);
|
|
w[48] = __byte_perm_S (w[10], w[11], selector);
|
|
w[47] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[46] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[45] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[44] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[43] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[42] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[41] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[40] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[39] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[38] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[37] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = __byte_perm_S (w[24], w[25], selector);
|
|
w[62] = __byte_perm_S (w[23], w[24], selector);
|
|
w[61] = __byte_perm_S (w[22], w[23], selector);
|
|
w[60] = __byte_perm_S (w[21], w[22], selector);
|
|
w[59] = __byte_perm_S (w[20], w[21], selector);
|
|
w[58] = __byte_perm_S (w[19], w[20], selector);
|
|
w[57] = __byte_perm_S (w[18], w[19], selector);
|
|
w[56] = __byte_perm_S (w[17], w[18], selector);
|
|
w[55] = __byte_perm_S (w[16], w[17], selector);
|
|
w[54] = __byte_perm_S (w[15], w[16], selector);
|
|
w[53] = __byte_perm_S (w[14], w[15], selector);
|
|
w[52] = __byte_perm_S (w[13], w[14], selector);
|
|
w[51] = __byte_perm_S (w[12], w[13], selector);
|
|
w[50] = __byte_perm_S (w[11], w[12], selector);
|
|
w[49] = __byte_perm_S (w[10], w[11], selector);
|
|
w[48] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[47] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[46] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[45] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[44] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[43] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[42] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[41] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[40] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[39] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[38] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = __byte_perm_S (w[23], w[24], selector);
|
|
w[62] = __byte_perm_S (w[22], w[23], selector);
|
|
w[61] = __byte_perm_S (w[21], w[22], selector);
|
|
w[60] = __byte_perm_S (w[20], w[21], selector);
|
|
w[59] = __byte_perm_S (w[19], w[20], selector);
|
|
w[58] = __byte_perm_S (w[18], w[19], selector);
|
|
w[57] = __byte_perm_S (w[17], w[18], selector);
|
|
w[56] = __byte_perm_S (w[16], w[17], selector);
|
|
w[55] = __byte_perm_S (w[15], w[16], selector);
|
|
w[54] = __byte_perm_S (w[14], w[15], selector);
|
|
w[53] = __byte_perm_S (w[13], w[14], selector);
|
|
w[52] = __byte_perm_S (w[12], w[13], selector);
|
|
w[51] = __byte_perm_S (w[11], w[12], selector);
|
|
w[50] = __byte_perm_S (w[10], w[11], selector);
|
|
w[49] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[48] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[47] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[46] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[45] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[44] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[43] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[42] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[41] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[40] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[39] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = __byte_perm_S (w[22], w[23], selector);
|
|
w[62] = __byte_perm_S (w[21], w[22], selector);
|
|
w[61] = __byte_perm_S (w[20], w[21], selector);
|
|
w[60] = __byte_perm_S (w[19], w[20], selector);
|
|
w[59] = __byte_perm_S (w[18], w[19], selector);
|
|
w[58] = __byte_perm_S (w[17], w[18], selector);
|
|
w[57] = __byte_perm_S (w[16], w[17], selector);
|
|
w[56] = __byte_perm_S (w[15], w[16], selector);
|
|
w[55] = __byte_perm_S (w[14], w[15], selector);
|
|
w[54] = __byte_perm_S (w[13], w[14], selector);
|
|
w[53] = __byte_perm_S (w[12], w[13], selector);
|
|
w[52] = __byte_perm_S (w[11], w[12], selector);
|
|
w[51] = __byte_perm_S (w[10], w[11], selector);
|
|
w[50] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[49] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[48] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[47] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[46] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[45] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[44] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[43] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[42] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[41] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[40] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = __byte_perm_S (w[21], w[22], selector);
|
|
w[62] = __byte_perm_S (w[20], w[21], selector);
|
|
w[61] = __byte_perm_S (w[19], w[20], selector);
|
|
w[60] = __byte_perm_S (w[18], w[19], selector);
|
|
w[59] = __byte_perm_S (w[17], w[18], selector);
|
|
w[58] = __byte_perm_S (w[16], w[17], selector);
|
|
w[57] = __byte_perm_S (w[15], w[16], selector);
|
|
w[56] = __byte_perm_S (w[14], w[15], selector);
|
|
w[55] = __byte_perm_S (w[13], w[14], selector);
|
|
w[54] = __byte_perm_S (w[12], w[13], selector);
|
|
w[53] = __byte_perm_S (w[11], w[12], selector);
|
|
w[52] = __byte_perm_S (w[10], w[11], selector);
|
|
w[51] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[50] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[49] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[48] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[47] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[46] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[45] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[44] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[43] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[42] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[41] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = __byte_perm_S (w[20], w[21], selector);
|
|
w[62] = __byte_perm_S (w[19], w[20], selector);
|
|
w[61] = __byte_perm_S (w[18], w[19], selector);
|
|
w[60] = __byte_perm_S (w[17], w[18], selector);
|
|
w[59] = __byte_perm_S (w[16], w[17], selector);
|
|
w[58] = __byte_perm_S (w[15], w[16], selector);
|
|
w[57] = __byte_perm_S (w[14], w[15], selector);
|
|
w[56] = __byte_perm_S (w[13], w[14], selector);
|
|
w[55] = __byte_perm_S (w[12], w[13], selector);
|
|
w[54] = __byte_perm_S (w[11], w[12], selector);
|
|
w[53] = __byte_perm_S (w[10], w[11], selector);
|
|
w[52] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[51] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[50] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[49] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[48] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[47] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[46] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[45] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[44] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[43] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[42] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = __byte_perm_S (w[19], w[20], selector);
|
|
w[62] = __byte_perm_S (w[18], w[19], selector);
|
|
w[61] = __byte_perm_S (w[17], w[18], selector);
|
|
w[60] = __byte_perm_S (w[16], w[17], selector);
|
|
w[59] = __byte_perm_S (w[15], w[16], selector);
|
|
w[58] = __byte_perm_S (w[14], w[15], selector);
|
|
w[57] = __byte_perm_S (w[13], w[14], selector);
|
|
w[56] = __byte_perm_S (w[12], w[13], selector);
|
|
w[55] = __byte_perm_S (w[11], w[12], selector);
|
|
w[54] = __byte_perm_S (w[10], w[11], selector);
|
|
w[53] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[52] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[51] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[50] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[49] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[48] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[47] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[46] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[45] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[44] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[43] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = __byte_perm_S (w[18], w[19], selector);
|
|
w[62] = __byte_perm_S (w[17], w[18], selector);
|
|
w[61] = __byte_perm_S (w[16], w[17], selector);
|
|
w[60] = __byte_perm_S (w[15], w[16], selector);
|
|
w[59] = __byte_perm_S (w[14], w[15], selector);
|
|
w[58] = __byte_perm_S (w[13], w[14], selector);
|
|
w[57] = __byte_perm_S (w[12], w[13], selector);
|
|
w[56] = __byte_perm_S (w[11], w[12], selector);
|
|
w[55] = __byte_perm_S (w[10], w[11], selector);
|
|
w[54] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[53] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[52] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[51] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[50] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[49] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[48] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[47] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[46] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[45] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[44] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = __byte_perm_S (w[17], w[18], selector);
|
|
w[62] = __byte_perm_S (w[16], w[17], selector);
|
|
w[61] = __byte_perm_S (w[15], w[16], selector);
|
|
w[60] = __byte_perm_S (w[14], w[15], selector);
|
|
w[59] = __byte_perm_S (w[13], w[14], selector);
|
|
w[58] = __byte_perm_S (w[12], w[13], selector);
|
|
w[57] = __byte_perm_S (w[11], w[12], selector);
|
|
w[56] = __byte_perm_S (w[10], w[11], selector);
|
|
w[55] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[54] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[53] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[52] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[51] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[50] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[49] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[48] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[47] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[46] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[45] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = __byte_perm_S (w[16], w[17], selector);
|
|
w[62] = __byte_perm_S (w[15], w[16], selector);
|
|
w[61] = __byte_perm_S (w[14], w[15], selector);
|
|
w[60] = __byte_perm_S (w[13], w[14], selector);
|
|
w[59] = __byte_perm_S (w[12], w[13], selector);
|
|
w[58] = __byte_perm_S (w[11], w[12], selector);
|
|
w[57] = __byte_perm_S (w[10], w[11], selector);
|
|
w[56] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[55] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[54] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[53] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[52] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[51] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[50] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[49] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[48] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[47] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[46] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = __byte_perm_S (w[15], w[16], selector);
|
|
w[62] = __byte_perm_S (w[14], w[15], selector);
|
|
w[61] = __byte_perm_S (w[13], w[14], selector);
|
|
w[60] = __byte_perm_S (w[12], w[13], selector);
|
|
w[59] = __byte_perm_S (w[11], w[12], selector);
|
|
w[58] = __byte_perm_S (w[10], w[11], selector);
|
|
w[57] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[56] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[55] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[54] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[53] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[52] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[51] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[50] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[49] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[48] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[47] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = __byte_perm_S (w[14], w[15], selector);
|
|
w[62] = __byte_perm_S (w[13], w[14], selector);
|
|
w[61] = __byte_perm_S (w[12], w[13], selector);
|
|
w[60] = __byte_perm_S (w[11], w[12], selector);
|
|
w[59] = __byte_perm_S (w[10], w[11], selector);
|
|
w[58] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[57] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[56] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[55] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[54] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[53] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[52] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[51] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[50] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[49] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[48] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = __byte_perm_S (w[13], w[14], selector);
|
|
w[62] = __byte_perm_S (w[12], w[13], selector);
|
|
w[61] = __byte_perm_S (w[11], w[12], selector);
|
|
w[60] = __byte_perm_S (w[10], w[11], selector);
|
|
w[59] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[58] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[57] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[56] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[55] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[54] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[53] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[52] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[51] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[50] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[49] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = __byte_perm_S (w[12], w[13], selector);
|
|
w[62] = __byte_perm_S (w[11], w[12], selector);
|
|
w[61] = __byte_perm_S (w[10], w[11], selector);
|
|
w[60] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[59] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[58] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[57] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[56] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[55] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[54] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[53] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[52] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[51] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[50] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = __byte_perm_S (w[11], w[12], selector);
|
|
w[62] = __byte_perm_S (w[10], w[11], selector);
|
|
w[61] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[60] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[59] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[58] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[57] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[56] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[55] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[54] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[53] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[52] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[51] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = __byte_perm_S (w[10], w[11], selector);
|
|
w[62] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[61] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[60] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[59] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[58] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[57] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[56] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[55] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[54] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[53] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[52] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[62] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[61] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[60] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[59] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[58] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[57] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[56] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[55] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[54] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[53] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[62] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[61] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[60] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[59] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[58] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[57] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[56] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[55] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[54] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[62] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[61] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[60] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[59] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[58] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[57] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[56] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[55] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[62] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[61] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[60] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[59] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[58] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[57] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[56] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[62] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[61] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[60] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[59] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[58] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[57] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[62] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[61] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[60] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[59] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[58] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[62] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[61] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[60] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[59] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[62] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[61] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[60] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[62] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[61] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[62] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
__kernel void amp (__global pw_t *pws, __global pw_t *pws_amp, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max)
|
|
{
|
|
const u32 gid = get_global_id (0);
|
|
|
|
if (gid >= gid_max) return;
|
|
|
|
pw_t pw = pws[gid];
|
|
|
|
pw_t comb = combs_buf[0];
|
|
|
|
const u32 pw_len = pw.pw_len;
|
|
|
|
const u32 comb_len = comb.pw_len;
|
|
|
|
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
|
{
|
|
switch_buffer_by_offset_1x64_le_S (comb.i, pw_len);
|
|
}
|
|
|
|
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
|
{
|
|
switch_buffer_by_offset_1x64_le_S (pw.i, comb_len);
|
|
}
|
|
|
|
#pragma unroll
|
|
for (int i = 0; i < 64; i++)
|
|
{
|
|
pw.i[i] |= comb.i[i];
|
|
}
|
|
|
|
pw.pw_len = pw_len + comb_len;
|
|
|
|
pws_amp[gid] = pw;
|
|
}
|