From bbcaf25d4d2130fa9c34c314628f9fd2f706b61b Mon Sep 17 00:00:00 2001 From: James Almer Date: Mon, 9 Sep 2013 20:16:40 -0300 Subject: [PATCH] lavu/sha512: Fully unroll the transform function loops crypto_bench SHA-512 results using an AMD Athlon X2 7750+, mingw32-w64 GCC 4.7.3 x86_64 Before: lavu SHA-512 size: 1048576 runs: 1024 time: 12.737 +- 0.147 After: lavu SHA-512 size: 1048576 runs: 1024 time: 11.670 +- 0.173 Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- libavutil/sha512.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/libavutil/sha512.c b/libavutil/sha512.c index 84136037db..66a864f1a6 100644 --- a/libavutil/sha512.c +++ b/libavutil/sha512.c @@ -150,27 +150,32 @@ static void sha512_transform(uint64_t *state, const uint8_t buffer[128]) a = T1 + T2; } #else - for (i = 0; i < 16 - 7;) { - ROUND512_0_TO_15(a, b, c, d, e, f, g, h); - ROUND512_0_TO_15(h, a, b, c, d, e, f, g); - ROUND512_0_TO_15(g, h, a, b, c, d, e, f); - ROUND512_0_TO_15(f, g, h, a, b, c, d, e); - ROUND512_0_TO_15(e, f, g, h, a, b, c, d); - ROUND512_0_TO_15(d, e, f, g, h, a, b, c); - ROUND512_0_TO_15(c, d, e, f, g, h, a, b); - ROUND512_0_TO_15(b, c, d, e, f, g, h, a); - } - for (; i < 80 - 7;) { - ROUND512_16_TO_80(a, b, c, d, e, f, g, h); - ROUND512_16_TO_80(h, a, b, c, d, e, f, g); - ROUND512_16_TO_80(g, h, a, b, c, d, e, f); - ROUND512_16_TO_80(f, g, h, a, b, c, d, e); - ROUND512_16_TO_80(e, f, g, h, a, b, c, d); - ROUND512_16_TO_80(d, e, f, g, h, a, b, c); - ROUND512_16_TO_80(c, d, e, f, g, h, a, b); - ROUND512_16_TO_80(b, c, d, e, f, g, h, a); - } +#define R512_0 \ + ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \ + ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \ + ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \ + ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \ + ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \ + ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \ + ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \ + ROUND512_0_TO_15(b, c, d, e, f, g, h, a) + + i = 0; + R512_0; R512_0; + +#define R512_16 \ + ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \ + ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \ + ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \ + ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \ + ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \ + ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \ + ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \ + ROUND512_16_TO_80(b, c, d, e, f, g, h, a) + + R512_16; R512_16; R512_16; R512_16; + R512_16; R512_16; R512_16; R512_16; #endif state[0] += a; state[1] += b;