diff --git a/OpenCL/rp.c b/OpenCL/rp.c index 741de4055..d702956d3 100644 --- a/OpenCL/rp.c +++ b/OpenCL/rp.c @@ -175,293 +175,294 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 #ifdef IS_NV switch (num) { - case 0: out1[3] = in1[3]; - out1[2] = in1[2]; - out1[1] = in1[1]; - out1[0] = in1[0]; - out0[3] = in0[3]; - out0[2] = in0[2]; + case 0: out0[0] = in0[0]; out0[1] = in0[1]; - out0[0] = in0[0]; + out0[2] = in0[2]; + out0[3] = in0[3]; + out1[0] = in1[0]; + out1[1] = in1[1]; + out1[2] = in1[2]; + out1[3] = in1[3]; break; - case 1: out1[3] = __byte_perm (in1[2], in1[3], 0x6543); - out1[2] = __byte_perm (in1[1], in1[2], 0x6543); - out1[1] = __byte_perm (in1[0], in1[1], 0x6543); - out1[0] = __byte_perm (in0[3], in1[0], 0x6543); - out0[3] = __byte_perm (in0[2], in0[3], 0x6543); - out0[2] = __byte_perm (in0[1], in0[2], 0x6543); - out0[1] = __byte_perm (in0[0], in0[1], 0x6543); - out0[0] = __byte_perm ( 0, in0[0], 0x6543); + case 1: out0[0] = __byte_perm (in0[0], in0[1], 0x4321); + out0[1] = __byte_perm (in0[1], in0[2], 0x4321); + out0[2] = __byte_perm (in0[2], in0[3], 0x4321); + out0[3] = __byte_perm (in0[3], in1[0], 0x4321); + out1[0] = __byte_perm (in1[0], in1[1], 0x4321); + out1[1] = __byte_perm (in1[1], in1[2], 0x4321); + out1[2] = __byte_perm (in1[2], in1[3], 0x4321); + out1[3] = __byte_perm (in1[3], 0, 0x4321); break; - case 2: out1[3] = __byte_perm (in1[2], in1[3], 0x5432); - out1[2] = __byte_perm (in1[1], in1[2], 0x5432); - out1[1] = __byte_perm (in1[0], in1[1], 0x5432); - out1[0] = __byte_perm (in0[3], in1[0], 0x5432); - out0[3] = __byte_perm (in0[2], in0[3], 0x5432); - out0[2] = __byte_perm (in0[1], in0[2], 0x5432); - out0[1] = __byte_perm (in0[0], in0[1], 0x5432); - out0[0] = __byte_perm ( 0, in0[0], 0x5432); + case 2: out0[0] = __byte_perm (in0[0], in0[1], 0x5432); + out0[1] = __byte_perm (in0[1], in0[2], 0x5432); + out0[2] = __byte_perm (in0[2], in0[3], 0x5432); + out0[3] = __byte_perm (in0[3], in1[0], 0x5432); + out1[0] = __byte_perm (in1[0], in1[1], 0x5432); + out1[1] = __byte_perm (in1[1], in1[2], 0x5432); + out1[2] = __byte_perm (in1[2], in1[3], 0x5432); + out1[3] = __byte_perm (in1[3], 0, 0x5432); break; - case 3: out1[3] = __byte_perm (in1[2], in1[3], 0x4321); - out1[2] = __byte_perm (in1[1], in1[2], 0x4321); - out1[1] = __byte_perm (in1[0], in1[1], 0x4321); - out1[0] = __byte_perm (in0[3], in1[0], 0x4321); - out0[3] = __byte_perm (in0[2], in0[3], 0x4321); - out0[2] = __byte_perm (in0[1], in0[2], 0x4321); - out0[1] = __byte_perm (in0[0], in0[1], 0x4321); - out0[0] = __byte_perm ( 0, in0[0], 0x4321); + case 3: out0[0] = __byte_perm (in0[0], in0[1], 0x6543); + out0[1] = __byte_perm (in0[1], in0[2], 0x6543); + out0[2] = __byte_perm (in0[2], in0[3], 0x6543); + out0[3] = __byte_perm (in0[3], in1[0], 0x6543); + out1[0] = __byte_perm (in1[0], in1[1], 0x6543); + out1[1] = __byte_perm (in1[1], in1[2], 0x6543); + out1[2] = __byte_perm (in1[2], in1[3], 0x6543); + out1[3] = __byte_perm (in1[3], 0, 0x6543); break; - case 4: out1[3] = in1[2]; - out1[2] = in1[1]; - out1[1] = in1[0]; - out1[0] = in0[3]; - out0[3] = in0[2]; - out0[2] = in0[1]; - out0[1] = in0[0]; - out0[0] = 0; + case 4: out0[0] = in0[1]; + out0[1] = in0[2]; + out0[2] = in0[3]; + out0[3] = in1[0]; + out1[0] = in1[1]; + out1[1] = in1[2]; + out1[2] = in1[3]; + out1[3] = 0; break; - case 5: out1[3] = __byte_perm (in1[1], in1[2], 0x6543); - out1[2] = __byte_perm (in1[0], in1[1], 0x6543); - out1[1] = __byte_perm (in0[3], in1[0], 0x6543); - out1[0] = __byte_perm (in0[2], in0[3], 0x6543); - out0[3] = __byte_perm (in0[1], in0[2], 0x6543); - out0[2] = __byte_perm (in0[0], in0[1], 0x6543); - out0[1] = __byte_perm ( 0, in0[0], 0x6543); - out0[0] = 0; + case 5: out0[0] = __byte_perm (in0[1], in0[2], 0x4321); + out0[1] = __byte_perm (in0[2], in0[3], 0x4321); + out0[2] = __byte_perm (in0[3], in1[0], 0x4321); + out0[3] = __byte_perm (in1[0], in1[1], 0x4321); + out1[0] = __byte_perm (in1[1], in1[2], 0x4321); + out1[1] = __byte_perm (in1[2], in1[3], 0x4321); + out1[2] = __byte_perm (in1[3], 0, 0x4321); + out1[3] = 0; break; - case 6: out1[3] = __byte_perm (in1[1], in1[2], 0x5432); - out1[2] = __byte_perm (in1[0], in1[1], 0x5432); - out1[1] = __byte_perm (in0[3], in1[0], 0x5432); - out1[0] = __byte_perm (in0[2], in0[3], 0x5432); - out0[3] = __byte_perm (in0[1], in0[2], 0x5432); - out0[2] = __byte_perm (in0[0], in0[1], 0x5432); - out0[1] = __byte_perm ( 0, in0[0], 0x5432); - out0[0] = 0; + case 6: out0[0] = __byte_perm (in0[1], in0[2], 0x5432); + out0[1] = __byte_perm (in0[2], in0[3], 0x5432); + out0[2] = __byte_perm (in0[3], in1[0], 0x5432); + out0[3] = __byte_perm (in1[0], in1[1], 0x5432); + out1[0] = __byte_perm (in1[1], in1[2], 0x5432); + out1[1] = __byte_perm (in1[2], in1[3], 0x5432); + out1[2] = __byte_perm (in1[3], 0, 0x5432); + out1[3] = 0; break; - case 7: out1[3] = __byte_perm (in1[1], in1[2], 0x4321); - out1[2] = __byte_perm (in1[0], in1[1], 0x4321); - out1[1] = __byte_perm (in0[3], in1[0], 0x4321); - out1[0] = __byte_perm (in0[2], in0[3], 0x4321); - out0[3] = __byte_perm (in0[1], in0[2], 0x4321); - out0[2] = __byte_perm (in0[0], in0[1], 0x4321); - out0[1] = __byte_perm ( 0, in0[0], 0x4321); - out0[0] = 0; + case 7: out0[0] = __byte_perm (in0[1], in0[2], 0x6543); + out0[1] = __byte_perm (in0[2], in0[3], 0x6543); + out0[2] = __byte_perm (in0[3], in1[0], 0x6543); + out0[3] = __byte_perm (in1[0], in1[1], 0x6543); + out1[0] = __byte_perm (in1[1], in1[2], 0x6543); + out1[1] = __byte_perm (in1[2], in1[3], 0x6543); + out1[2] = __byte_perm (in1[3], 0, 0x6543); + out1[3] = 0; break; - case 8: out1[3] = in1[1]; - out1[2] = in1[0]; - out1[1] = in0[3]; - out1[0] = in0[2]; - out0[3] = in0[1]; - out0[2] = in0[0]; - out0[1] = 0; - out0[0] = 0; - break; - case 9: out1[3] = __byte_perm (in1[0], in1[1], 0x6543); - out1[2] = __byte_perm (in0[3], in1[0], 0x6543); - out1[1] = __byte_perm (in0[2], in0[3], 0x6543); - out1[0] = __byte_perm (in0[1], in0[2], 0x6543); - out0[3] = __byte_perm (in0[0], in0[1], 0x6543); - out0[2] = __byte_perm ( 0, in0[0], 0x6543); - out0[1] = 0; - out0[0] = 0; - break; - case 10: out1[3] = __byte_perm (in1[0], in1[1], 0x5432); - out1[2] = __byte_perm (in0[3], in1[0], 0x5432); - out1[1] = __byte_perm (in0[2], in0[3], 0x5432); - out1[0] = __byte_perm (in0[1], in0[2], 0x5432); - out0[3] = __byte_perm (in0[0], in0[1], 0x5432); - out0[2] = __byte_perm ( 0, in0[0], 0x5432); - out0[1] = 0; - out0[0] = 0; - break; - case 11: out1[3] = __byte_perm (in1[0], in1[1], 0x4321); - out1[2] = __byte_perm (in0[3], in1[0], 0x4321); - out1[1] = __byte_perm (in0[2], in0[3], 0x4321); - out1[0] = __byte_perm (in0[1], in0[2], 0x4321); - out0[3] = __byte_perm (in0[0], in0[1], 0x4321); - out0[2] = __byte_perm ( 0, in0[0], 0x4321); - out0[1] = 0; - out0[0] = 0; - break; - case 12: out1[3] = in1[0]; - out1[2] = in0[3]; - out1[1] = in0[2]; - out1[0] = in0[1]; - out0[3] = in0[0]; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 13: out1[3] = __byte_perm (in0[3], in1[0], 0x6543); - out1[2] = __byte_perm (in0[2], in0[3], 0x6543); - out1[1] = __byte_perm (in0[1], in0[2], 0x6543); - out1[0] = __byte_perm (in0[0], in0[1], 0x6543); - out0[3] = __byte_perm ( 0, in0[0], 0x6543); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 14: out1[3] = __byte_perm (in0[3], in1[0], 0x5432); - out1[2] = __byte_perm (in0[2], in0[3], 0x5432); - out1[1] = __byte_perm (in0[1], in0[2], 0x5432); - out1[0] = __byte_perm (in0[0], in0[1], 0x5432); - out0[3] = __byte_perm ( 0, in0[0], 0x5432); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 15: out1[3] = __byte_perm (in0[3], in1[0], 0x4321); - out1[2] = __byte_perm (in0[2], in0[3], 0x4321); - out1[1] = __byte_perm (in0[1], in0[2], 0x4321); - out1[0] = __byte_perm (in0[0], in0[1], 0x4321); - out0[3] = __byte_perm ( 0, in0[0], 0x4321); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 16: out1[3] = in0[3]; - out1[2] = in0[2]; - out1[1] = in0[1]; - out1[0] = in0[0]; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 17: out1[3] = __byte_perm (in0[2], in0[3], 0x6543); - out1[2] = __byte_perm (in0[1], in0[2], 0x6543); - out1[1] = __byte_perm (in0[0], in0[1], 0x6543); - out1[0] = __byte_perm ( 0, in0[0], 0x6543); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 18: out1[3] = __byte_perm (in0[2], in0[3], 0x5432); - out1[2] = __byte_perm (in0[1], in0[2], 0x5432); - out1[1] = __byte_perm (in0[0], in0[1], 0x5432); - out1[0] = __byte_perm ( 0, in0[0], 0x5432); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 19: out1[3] = __byte_perm (in0[2], in0[3], 0x4321); - out1[2] = __byte_perm (in0[1], in0[2], 0x4321); - out1[1] = __byte_perm (in0[0], in0[1], 0x4321); - out1[0] = __byte_perm ( 0, in0[0], 0x4321); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 20: out1[3] = in0[2]; - out1[2] = in0[1]; - out1[1] = in0[0]; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 21: out1[3] = __byte_perm (in0[1], in0[2], 0x6543); - out1[2] = __byte_perm (in0[0], in0[1], 0x6543); - out1[1] = __byte_perm ( 0, in0[0], 0x6543); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 22: out1[3] = __byte_perm (in0[1], in0[2], 0x5432); - out1[2] = __byte_perm (in0[0], in0[1], 0x5432); - out1[1] = __byte_perm ( 0, in0[0], 0x5432); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 23: out1[3] = __byte_perm (in0[1], in0[2], 0x4321); - out1[2] = __byte_perm (in0[0], in0[1], 0x4321); - out1[1] = __byte_perm ( 0, in0[0], 0x4321); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 24: out1[3] = in0[1]; - out1[2] = in0[0]; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 25: out1[3] = __byte_perm (in0[0], in0[1], 0x6543); - out1[2] = __byte_perm ( 0, in0[0], 0x6543); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 26: out1[3] = __byte_perm (in0[0], in0[1], 0x5432); - out1[2] = __byte_perm ( 0, in0[0], 0x5432); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 27: out1[3] = __byte_perm (in0[0], in0[1], 0x4321); - out1[2] = __byte_perm ( 0, in0[0], 0x4321); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 28: out1[3] = in0[0]; + case 8: out0[0] = in0[2]; + out0[1] = in0[3]; + out0[2] = in1[0]; + out0[3] = in1[1]; + out1[0] = in1[2]; + out1[1] = in1[3]; out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out1[3] = 0; break; - case 29: out1[3] = __byte_perm ( 0, in0[0], 0x6543); + case 9: out0[0] = __byte_perm (in0[2], in0[3], 0x4321); + out0[1] = __byte_perm (in0[3], in1[0], 0x4321); + out0[2] = __byte_perm (in1[0], in1[1], 0x4321); + out0[3] = __byte_perm (in1[1], in1[2], 0x4321); + out1[0] = __byte_perm (in1[2], in1[3], 0x4321); + out1[1] = __byte_perm (in1[3], 0, 0x4321); out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out1[3] = 0; break; - case 30: out1[3] = __byte_perm ( 0, in0[0], 0x5432); + case 10: out0[0] = __byte_perm (in0[2], in0[3], 0x5432); + out0[1] = __byte_perm (in0[3], in1[0], 0x5432); + out0[2] = __byte_perm (in1[0], in1[1], 0x5432); + out0[3] = __byte_perm (in1[1], in1[2], 0x5432); + out1[0] = __byte_perm (in1[2], in1[3], 0x5432); + out1[1] = __byte_perm (in1[3], 0, 0x5432); out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out1[3] = 0; break; - case 31: out1[3] = __byte_perm ( 0, in0[0], 0x4321); + case 11: out0[0] = __byte_perm (in0[2], in0[3], 0x6543); + out0[1] = __byte_perm (in0[3], in1[0], 0x6543); + out0[2] = __byte_perm (in1[0], in1[1], 0x6543); + out0[3] = __byte_perm (in1[1], in1[2], 0x6543); + out1[0] = __byte_perm (in1[2], in1[3], 0x6543); + out1[1] = __byte_perm (in1[3], 0, 0x6543); out1[2] = 0; + out1[3] = 0; + break; + case 12: out0[0] = in0[3]; + out0[1] = in1[0]; + out0[2] = in1[1]; + out0[3] = in1[2]; + out1[0] = in1[3]; out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 13: + out0[0] = __byte_perm (in0[3], in1[0], 0x4321); + out0[1] = __byte_perm (in1[0], in1[1], 0x4321); + out0[2] = __byte_perm (in1[1], in1[2], 0x4321); + out0[3] = __byte_perm (in1[2], in1[3], 0x4321); + out1[0] = __byte_perm (in1[3], 0, 0x4321); + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 14: out0[0] = __byte_perm (in0[3], in1[0], 0x5432); + out0[1] = __byte_perm (in1[0], in1[1], 0x5432); + out0[2] = __byte_perm (in1[1], in1[2], 0x5432); + out0[3] = __byte_perm (in1[2], in1[3], 0x5432); + out1[0] = __byte_perm (in1[3], 0, 0x5432); + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 15: out0[0] = __byte_perm (in0[3], in1[0], 0x6543); + out0[1] = __byte_perm (in1[0], in1[1], 0x6543); + out0[2] = __byte_perm (in1[1], in1[2], 0x6543); + out0[3] = __byte_perm (in1[2], in1[3], 0x6543); + out1[0] = __byte_perm (in1[3], 0, 0x6543); + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 16: out0[0] = in1[0]; + out0[1] = in1[1]; + out0[2] = in1[2]; + out0[3] = in1[3]; out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 17: out0[0] = __byte_perm (in1[0], in1[1], 0x4321); + out0[1] = __byte_perm (in1[1], in1[2], 0x4321); + out0[2] = __byte_perm (in1[2], in1[3], 0x4321); + out0[3] = __byte_perm (in1[3], 0, 0x4321); + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 18: out0[0] = __byte_perm (in1[0], in1[1], 0x5432); + out0[1] = __byte_perm (in1[1], in1[2], 0x5432); + out0[2] = __byte_perm (in1[2], in1[3], 0x5432); + out0[3] = __byte_perm (in1[3], 0, 0x5432); + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 19: out0[0] = __byte_perm (in1[0], in1[1], 0x6543); + out0[1] = __byte_perm (in1[1], in1[2], 0x6543); + out0[2] = __byte_perm (in1[2], in1[3], 0x6543); + out0[3] = __byte_perm (in1[3], 0, 0x6543); + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 20: out0[0] = in1[1]; + out0[1] = in1[2]; + out0[2] = in1[3]; out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 21: out0[0] = __byte_perm (in1[1], in1[2], 0x4321); + out0[1] = __byte_perm (in1[2], in1[3], 0x4321); + out0[2] = __byte_perm (in1[3], 0, 0x4321); + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 22: out0[0] = __byte_perm (in1[1], in1[2], 0x5432); + out0[1] = __byte_perm (in1[2], in1[3], 0x5432); + out0[2] = __byte_perm (in1[3], 0, 0x5432); + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 23: out0[0] = __byte_perm (in1[1], in1[2], 0x6543); + out0[1] = __byte_perm (in1[2], in1[3], 0x6543); + out0[2] = __byte_perm (in1[3], 0, 0x6543); + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 24: out0[0] = in1[2]; + out0[1] = in1[3]; out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 25: out0[0] = __byte_perm (in1[2], in1[3], 0x4321); + out0[1] = __byte_perm (in1[3], 0, 0x4321); + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 26: out0[0] = __byte_perm (in1[2], in1[3], 0x5432); + out0[1] = __byte_perm (in1[3], 0, 0x5432); + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 27: out0[0] = __byte_perm (in1[2], in1[3], 0x6543); + out0[1] = __byte_perm (in1[3], 0, 0x6543); + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 28: out0[0] = in1[3]; out0[1] = 0; - out0[0] = 0; + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 29: out0[0] = __byte_perm (in1[3], 0, 0x4321); + out0[1] = 0; + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 30: out0[0] = __byte_perm (in1[3], 0, 0x5432); + out0[1] = 0; + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 31: out0[0] = __byte_perm (in1[3], 0, 0x6543); + out0[1] = 0; + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; } #endif @@ -766,294 +767,293 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 #ifdef IS_NV switch (num) { - case 0: out0[0] = in0[0]; - out0[1] = in0[1]; - out0[2] = in0[2]; - out0[3] = in0[3]; - out1[0] = in1[0]; - out1[1] = in1[1]; + case 0: out1[3] = in1[3]; out1[2] = in1[2]; - out1[3] = in1[3]; + out1[1] = in1[1]; + out1[0] = in1[0]; + out0[3] = in0[3]; + out0[2] = in0[2]; + out0[1] = in0[1]; + out0[0] = in0[0]; break; - case 1: out0[0] = __byte_perm (in0[0], in0[1], 0x4321); - out0[1] = __byte_perm (in0[1], in0[2], 0x4321); - out0[2] = __byte_perm (in0[2], in0[3], 0x4321); - out0[3] = __byte_perm (in0[3], in1[0], 0x4321); - out1[0] = __byte_perm (in1[0], in1[1], 0x4321); - out1[1] = __byte_perm (in1[1], in1[2], 0x4321); - out1[2] = __byte_perm (in1[2], in1[3], 0x4321); - out1[3] = __byte_perm (in1[3], 0, 0x4321); + case 1: out1[3] = __byte_perm (in1[2], in1[3], 0x6543); + out1[2] = __byte_perm (in1[1], in1[2], 0x6543); + out1[1] = __byte_perm (in1[0], in1[1], 0x6543); + out1[0] = __byte_perm (in0[3], in1[0], 0x6543); + out0[3] = __byte_perm (in0[2], in0[3], 0x6543); + out0[2] = __byte_perm (in0[1], in0[2], 0x6543); + out0[1] = __byte_perm (in0[0], in0[1], 0x6543); + out0[0] = __byte_perm ( 0, in0[0], 0x6543); break; - case 2: out0[0] = __byte_perm (in0[0], in0[1], 0x5432); - out0[1] = __byte_perm (in0[1], in0[2], 0x5432); - out0[2] = __byte_perm (in0[2], in0[3], 0x5432); - out0[3] = __byte_perm (in0[3], in1[0], 0x5432); - out1[0] = __byte_perm (in1[0], in1[1], 0x5432); - out1[1] = __byte_perm (in1[1], in1[2], 0x5432); - out1[2] = __byte_perm (in1[2], in1[3], 0x5432); - out1[3] = __byte_perm (in1[3], 0, 0x5432); + case 2: out1[3] = __byte_perm (in1[2], in1[3], 0x5432); + out1[2] = __byte_perm (in1[1], in1[2], 0x5432); + out1[1] = __byte_perm (in1[0], in1[1], 0x5432); + out1[0] = __byte_perm (in0[3], in1[0], 0x5432); + out0[3] = __byte_perm (in0[2], in0[3], 0x5432); + out0[2] = __byte_perm (in0[1], in0[2], 0x5432); + out0[1] = __byte_perm (in0[0], in0[1], 0x5432); + out0[0] = __byte_perm ( 0, in0[0], 0x5432); break; - case 3: out0[0] = __byte_perm (in0[0], in0[1], 0x6543); - out0[1] = __byte_perm (in0[1], in0[2], 0x6543); - out0[2] = __byte_perm (in0[2], in0[3], 0x6543); - out0[3] = __byte_perm (in0[3], in1[0], 0x6543); - out1[0] = __byte_perm (in1[0], in1[1], 0x6543); - out1[1] = __byte_perm (in1[1], in1[2], 0x6543); - out1[2] = __byte_perm (in1[2], in1[3], 0x6543); - out1[3] = __byte_perm (in1[3], 0, 0x6543); + case 3: out1[3] = __byte_perm (in1[2], in1[3], 0x4321); + out1[2] = __byte_perm (in1[1], in1[2], 0x4321); + out1[1] = __byte_perm (in1[0], in1[1], 0x4321); + out1[0] = __byte_perm (in0[3], in1[0], 0x4321); + out0[3] = __byte_perm (in0[2], in0[3], 0x4321); + out0[2] = __byte_perm (in0[1], in0[2], 0x4321); + out0[1] = __byte_perm (in0[0], in0[1], 0x4321); + out0[0] = __byte_perm ( 0, in0[0], 0x4321); break; - case 4: out0[0] = in0[1]; - out0[1] = in0[2]; - out0[2] = in0[3]; - out0[3] = in1[0]; - out1[0] = in1[1]; - out1[1] = in1[2]; - out1[2] = in1[3]; - out1[3] = 0; + case 4: out1[3] = in1[2]; + out1[2] = in1[1]; + out1[1] = in1[0]; + out1[0] = in0[3]; + out0[3] = in0[2]; + out0[2] = in0[1]; + out0[1] = in0[0]; + out0[0] = 0; break; - case 5: out0[0] = __byte_perm (in0[1], in0[2], 0x4321); - out0[1] = __byte_perm (in0[2], in0[3], 0x4321); - out0[2] = __byte_perm (in0[3], in1[0], 0x4321); - out0[3] = __byte_perm (in1[0], in1[1], 0x4321); - out1[0] = __byte_perm (in1[1], in1[2], 0x4321); - out1[1] = __byte_perm (in1[2], in1[3], 0x4321); - out1[2] = __byte_perm (in1[3], 0, 0x4321); - out1[3] = 0; + case 5: out1[3] = __byte_perm (in1[1], in1[2], 0x6543); + out1[2] = __byte_perm (in1[0], in1[1], 0x6543); + out1[1] = __byte_perm (in0[3], in1[0], 0x6543); + out1[0] = __byte_perm (in0[2], in0[3], 0x6543); + out0[3] = __byte_perm (in0[1], in0[2], 0x6543); + out0[2] = __byte_perm (in0[0], in0[1], 0x6543); + out0[1] = __byte_perm ( 0, in0[0], 0x6543); + out0[0] = 0; break; - case 6: out0[0] = __byte_perm (in0[1], in0[2], 0x5432); - out0[1] = __byte_perm (in0[2], in0[3], 0x5432); - out0[2] = __byte_perm (in0[3], in1[0], 0x5432); - out0[3] = __byte_perm (in1[0], in1[1], 0x5432); - out1[0] = __byte_perm (in1[1], in1[2], 0x5432); - out1[1] = __byte_perm (in1[2], in1[3], 0x5432); - out1[2] = __byte_perm (in1[3], 0, 0x5432); - out1[3] = 0; + case 6: out1[3] = __byte_perm (in1[1], in1[2], 0x5432); + out1[2] = __byte_perm (in1[0], in1[1], 0x5432); + out1[1] = __byte_perm (in0[3], in1[0], 0x5432); + out1[0] = __byte_perm (in0[2], in0[3], 0x5432); + out0[3] = __byte_perm (in0[1], in0[2], 0x5432); + out0[2] = __byte_perm (in0[0], in0[1], 0x5432); + out0[1] = __byte_perm ( 0, in0[0], 0x5432); + out0[0] = 0; break; - case 7: out0[0] = __byte_perm (in0[1], in0[2], 0x6543); - out0[1] = __byte_perm (in0[2], in0[3], 0x6543); - out0[2] = __byte_perm (in0[3], in1[0], 0x6543); - out0[3] = __byte_perm (in1[0], in1[1], 0x6543); - out1[0] = __byte_perm (in1[1], in1[2], 0x6543); - out1[1] = __byte_perm (in1[2], in1[3], 0x6543); - out1[2] = __byte_perm (in1[3], 0, 0x6543); - out1[3] = 0; + case 7: out1[3] = __byte_perm (in1[1], in1[2], 0x4321); + out1[2] = __byte_perm (in1[0], in1[1], 0x4321); + out1[1] = __byte_perm (in0[3], in1[0], 0x4321); + out1[0] = __byte_perm (in0[2], in0[3], 0x4321); + out0[3] = __byte_perm (in0[1], in0[2], 0x4321); + out0[2] = __byte_perm (in0[0], in0[1], 0x4321); + out0[1] = __byte_perm ( 0, in0[0], 0x4321); + out0[0] = 0; break; - case 8: out0[0] = in0[2]; - out0[1] = in0[3]; - out0[2] = in1[0]; - out0[3] = in1[1]; - out1[0] = in1[2]; - out1[1] = in1[3]; - out1[2] = 0; - out1[3] = 0; - break; - case 9: out0[0] = __byte_perm (in0[2], in0[3], 0x4321); - out0[1] = __byte_perm (in0[3], in1[0], 0x4321); - out0[2] = __byte_perm (in1[0], in1[1], 0x4321); - out0[3] = __byte_perm (in1[1], in1[2], 0x4321); - out1[0] = __byte_perm (in1[2], in1[3], 0x4321); - out1[1] = __byte_perm (in1[3], 0, 0x4321); - out1[2] = 0; - out1[3] = 0; - break; - case 10: out0[0] = __byte_perm (in0[2], in0[3], 0x5432); - out0[1] = __byte_perm (in0[3], in1[0], 0x5432); - out0[2] = __byte_perm (in1[0], in1[1], 0x5432); - out0[3] = __byte_perm (in1[1], in1[2], 0x5432); - out1[0] = __byte_perm (in1[2], in1[3], 0x5432); - out1[1] = __byte_perm (in1[3], 0, 0x5432); - out1[2] = 0; - out1[3] = 0; - break; - case 11: out0[0] = __byte_perm (in0[2], in0[3], 0x6543); - out0[1] = __byte_perm (in0[3], in1[0], 0x6543); - out0[2] = __byte_perm (in1[0], in1[1], 0x6543); - out0[3] = __byte_perm (in1[1], in1[2], 0x6543); - out1[0] = __byte_perm (in1[2], in1[3], 0x6543); - out1[1] = __byte_perm (in1[3], 0, 0x6543); - out1[2] = 0; - out1[3] = 0; - break; - case 12: out0[0] = in0[3]; - out0[1] = in1[0]; - out0[2] = in1[1]; - out0[3] = in1[2]; - out1[0] = in1[3]; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 13: - out0[0] = __byte_perm (in0[3], in1[0], 0x4321); - out0[1] = __byte_perm (in1[0], in1[1], 0x4321); - out0[2] = __byte_perm (in1[1], in1[2], 0x4321); - out0[3] = __byte_perm (in1[2], in1[3], 0x4321); - out1[0] = __byte_perm (in1[3], 0, 0x4321); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 14: out0[0] = __byte_perm (in0[3], in1[0], 0x5432); - out0[1] = __byte_perm (in1[0], in1[1], 0x5432); - out0[2] = __byte_perm (in1[1], in1[2], 0x5432); - out0[3] = __byte_perm (in1[2], in1[3], 0x5432); - out1[0] = __byte_perm (in1[3], 0, 0x5432); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 15: out0[0] = __byte_perm (in0[3], in1[0], 0x6543); - out0[1] = __byte_perm (in1[0], in1[1], 0x6543); - out0[2] = __byte_perm (in1[1], in1[2], 0x6543); - out0[3] = __byte_perm (in1[2], in1[3], 0x6543); - out1[0] = __byte_perm (in1[3], 0, 0x6543); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 16: out0[0] = in1[0]; - out0[1] = in1[1]; - out0[2] = in1[2]; - out0[3] = in1[3]; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 17: out0[0] = __byte_perm (in1[0], in1[1], 0x4321); - out0[1] = __byte_perm (in1[1], in1[2], 0x4321); - out0[2] = __byte_perm (in1[2], in1[3], 0x4321); - out0[3] = __byte_perm (in1[3], 0, 0x4321); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 18: out0[0] = __byte_perm (in1[0], in1[1], 0x5432); - out0[1] = __byte_perm (in1[1], in1[2], 0x5432); - out0[2] = __byte_perm (in1[2], in1[3], 0x5432); - out0[3] = __byte_perm (in1[3], 0, 0x5432); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 19: out0[0] = __byte_perm (in1[0], in1[1], 0x6543); - out0[1] = __byte_perm (in1[1], in1[2], 0x6543); - out0[2] = __byte_perm (in1[2], in1[3], 0x6543); - out0[3] = __byte_perm (in1[3], 0, 0x6543); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 20: out0[0] = in1[1]; - out0[1] = in1[2]; - out0[2] = in1[3]; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 21: out0[0] = __byte_perm (in1[1], in1[2], 0x4321); - out0[1] = __byte_perm (in1[2], in1[3], 0x4321); - out0[2] = __byte_perm (in1[3], 0, 0x4321); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 22: out0[0] = __byte_perm (in1[1], in1[2], 0x5432); - out0[1] = __byte_perm (in1[2], in1[3], 0x5432); - out0[2] = __byte_perm (in1[3], 0, 0x5432); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 23: out0[0] = __byte_perm (in1[1], in1[2], 0x6543); - out0[1] = __byte_perm (in1[2], in1[3], 0x6543); - out0[2] = __byte_perm (in1[3], 0, 0x6543); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 24: out0[0] = in1[2]; - out0[1] = in1[3]; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 25: out0[0] = __byte_perm (in1[2], in1[3], 0x4321); - out0[1] = __byte_perm (in1[3], 0, 0x4321); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 26: out0[0] = __byte_perm (in1[2], in1[3], 0x5432); - out0[1] = __byte_perm (in1[3], 0, 0x5432); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 27: out0[0] = __byte_perm (in1[2], in1[3], 0x6543); - out0[1] = __byte_perm (in1[3], 0, 0x6543); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 28: out0[0] = in1[3]; + case 8: out1[3] = in1[1]; + out1[2] = in1[0]; + out1[1] = in0[3]; + out1[0] = in0[2]; + out0[3] = in0[1]; + out0[2] = in0[0]; out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out0[0] = 0; break; - case 29: out0[0] = __byte_perm (in1[3], 0, 0x4321); + case 9: out1[3] = __byte_perm (in1[0], in1[1], 0x6543); + out1[2] = __byte_perm (in0[3], in1[0], 0x6543); + out1[1] = __byte_perm (in0[2], in0[3], 0x6543); + out1[0] = __byte_perm (in0[1], in0[2], 0x6543); + out0[3] = __byte_perm (in0[0], in0[1], 0x6543); + out0[2] = __byte_perm ( 0, in0[0], 0x6543); out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out0[0] = 0; break; - case 30: out0[0] = __byte_perm (in1[3], 0, 0x5432); + case 10: out1[3] = __byte_perm (in1[0], in1[1], 0x5432); + out1[2] = __byte_perm (in0[3], in1[0], 0x5432); + out1[1] = __byte_perm (in0[2], in0[3], 0x5432); + out1[0] = __byte_perm (in0[1], in0[2], 0x5432); + out0[3] = __byte_perm (in0[0], in0[1], 0x5432); + out0[2] = __byte_perm ( 0, in0[0], 0x5432); out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out0[0] = 0; break; - case 31: out0[0] = __byte_perm (in1[3], 0, 0x6543); + case 11: out1[3] = __byte_perm (in1[0], in1[1], 0x4321); + out1[2] = __byte_perm (in0[3], in1[0], 0x4321); + out1[1] = __byte_perm (in0[2], in0[3], 0x4321); + out1[0] = __byte_perm (in0[1], in0[2], 0x4321); + out0[3] = __byte_perm (in0[0], in0[1], 0x4321); + out0[2] = __byte_perm ( 0, in0[0], 0x4321); out0[1] = 0; + out0[0] = 0; + break; + case 12: out1[3] = in1[0]; + out1[2] = in0[3]; + out1[1] = in0[2]; + out1[0] = in0[1]; + out0[3] = in0[0]; out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 13: out1[3] = __byte_perm (in0[3], in1[0], 0x6543); + out1[2] = __byte_perm (in0[2], in0[3], 0x6543); + out1[1] = __byte_perm (in0[1], in0[2], 0x6543); + out1[0] = __byte_perm (in0[0], in0[1], 0x6543); + out0[3] = __byte_perm ( 0, in0[0], 0x6543); + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 14: out1[3] = __byte_perm (in0[3], in1[0], 0x5432); + out1[2] = __byte_perm (in0[2], in0[3], 0x5432); + out1[1] = __byte_perm (in0[1], in0[2], 0x5432); + out1[0] = __byte_perm (in0[0], in0[1], 0x5432); + out0[3] = __byte_perm ( 0, in0[0], 0x5432); + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 15: out1[3] = __byte_perm (in0[3], in1[0], 0x4321); + out1[2] = __byte_perm (in0[2], in0[3], 0x4321); + out1[1] = __byte_perm (in0[1], in0[2], 0x4321); + out1[0] = __byte_perm (in0[0], in0[1], 0x4321); + out0[3] = __byte_perm ( 0, in0[0], 0x4321); + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 16: out1[3] = in0[3]; + out1[2] = in0[2]; + out1[1] = in0[1]; + out1[0] = in0[0]; out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 17: out1[3] = __byte_perm (in0[2], in0[3], 0x6543); + out1[2] = __byte_perm (in0[1], in0[2], 0x6543); + out1[1] = __byte_perm (in0[0], in0[1], 0x6543); + out1[0] = __byte_perm ( 0, in0[0], 0x6543); + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 18: out1[3] = __byte_perm (in0[2], in0[3], 0x5432); + out1[2] = __byte_perm (in0[1], in0[2], 0x5432); + out1[1] = __byte_perm (in0[0], in0[1], 0x5432); + out1[0] = __byte_perm ( 0, in0[0], 0x5432); + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 19: out1[3] = __byte_perm (in0[2], in0[3], 0x4321); + out1[2] = __byte_perm (in0[1], in0[2], 0x4321); + out1[1] = __byte_perm (in0[0], in0[1], 0x4321); + out1[0] = __byte_perm ( 0, in0[0], 0x4321); + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 20: out1[3] = in0[2]; + out1[2] = in0[1]; + out1[1] = in0[0]; out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 21: out1[3] = __byte_perm (in0[1], in0[2], 0x6543); + out1[2] = __byte_perm (in0[0], in0[1], 0x6543); + out1[1] = __byte_perm ( 0, in0[0], 0x6543); + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 22: out1[3] = __byte_perm (in0[1], in0[2], 0x5432); + out1[2] = __byte_perm (in0[0], in0[1], 0x5432); + out1[1] = __byte_perm ( 0, in0[0], 0x5432); + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 23: out1[3] = __byte_perm (in0[1], in0[2], 0x4321); + out1[2] = __byte_perm (in0[0], in0[1], 0x4321); + out1[1] = __byte_perm ( 0, in0[0], 0x4321); + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 24: out1[3] = in0[1]; + out1[2] = in0[0]; out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 25: out1[3] = __byte_perm (in0[0], in0[1], 0x6543); + out1[2] = __byte_perm ( 0, in0[0], 0x6543); + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 26: out1[3] = __byte_perm (in0[0], in0[1], 0x5432); + out1[2] = __byte_perm ( 0, in0[0], 0x5432); + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 27: out1[3] = __byte_perm (in0[0], in0[1], 0x4321); + out1[2] = __byte_perm ( 0, in0[0], 0x4321); + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 28: out1[3] = in0[0]; out1[2] = 0; - out1[3] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 29: out1[3] = __byte_perm ( 0, in0[0], 0x6543); + out1[2] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 30: out1[3] = __byte_perm ( 0, in0[0], 0x5432); + out1[2] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 31: out1[3] = __byte_perm ( 0, in0[0], 0x4321); + out1[2] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; break; } #endif