mirror of
https://github.com/hashcat/hashcat
synced 2024-11-28 05:21:38 +01:00
1885 lines
43 KiB
Common Lisp
1885 lines
43 KiB
Common Lisp
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*
|
|
* Furthermore, since elliptic curve operations are highly researched and optimized,
|
|
* we've consulted a lot of online resources to implement this, including several papers and
|
|
* example code.
|
|
*
|
|
* Credits where credits are due: there are a lot of nice projects that explain and/or optimize
|
|
* elliptic curve operations (especially elliptic curve multiplications by a scalar).
|
|
*
|
|
* We want to shout out following projects, which were quite helpful when implementing this:
|
|
* - secp256k1 by Pieter Wuille (https://github.com/bitcoin-core/secp256k1/, MIT)
|
|
* - secp256k1-cl by hhanh00 (https://github.com/hhanh00/secp256k1-cl/, MIT)
|
|
* - ec_pure_c by masterzorag (https://github.com/masterzorag/ec_pure_c/)
|
|
* - ecc-gmp by leivaburto (https://github.com/leivaburto/ecc-gmp)
|
|
* - micro-ecc by Ken MacKay (https://github.com/kmackay/micro-ecc/, BSD)
|
|
* - curve_example by willem (https://gist.github.com/nlitsme/c9031c7b9bf6bb009e5a)
|
|
* - py_ecc by Vitalik Buterin (https://github.com/ethereum/py_ecc/, MIT)
|
|
*
|
|
*
|
|
* Some BigNum operations are implemented similar to micro-ecc which is licensed under these terms:
|
|
* Copyright 2014 Ken MacKay, 2-Clause BSD License
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
|
* provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
|
* conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of
|
|
* conditions and the following disclaimer in the documentation and/or other materials
|
|
* provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
|
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* ATTENTION: this code is NOT meant to be used in security critical environments that are at risk
|
|
* of side-channel or timing attacks etc, it's only purpose is to make it work fast for GPGPU
|
|
* (OpenCL/CUDA). Some attack vectors like side-channel and timing-attacks might be possible,
|
|
* because of some optimizations used within this code (non-constant time etc).
|
|
*/
|
|
|
|
/*
|
|
* Implementation considerations:
|
|
* point double and point add are implemented similar to algorithms mentioned in this 2011 paper:
|
|
* http://eprint.iacr.org/2011/338.pdf
|
|
* (Fast and Regular Algorithms for Scalar Multiplication over Elliptic Curves by Matthieu Rivain)
|
|
*
|
|
* In theory we could use the Jacobian Co-Z enhancement to get rid of the larger buffer caused by
|
|
* the z coordinates (and in this way reduce register pressure etc).
|
|
* For the Co-Z improvement there are a lot of fast algorithms, but we might still be faster
|
|
* with this implementation (b/c we allow non-constant time) without the Brier/Joye Montgomery-like
|
|
* ladder. Of course, this claim would need to be verified and tested to see which one is faster
|
|
* for our specific scenario at the end.
|
|
*
|
|
* A speedup could also be possible by using scalars converted to (w)NAF (non-adjacent form) or by
|
|
* just using the windowed (precomputed zi) method or similar improvements:
|
|
* The general idea of w-NAF would be to pre-compute some zi coefficients like below to reduce the
|
|
* costly point additions by using a non-binary ("signed") number system (values other than just
|
|
* 0 and 1, but ranging from -2^(w-1)-1 to 2^(w-1)-1). This would work best with the left-to-right
|
|
* binary algorithm such that we could just add zi * P when adding point P (pre-compute all the
|
|
* possible zi * P values because the x/y coordinates are known before the kernel starts):
|
|
*
|
|
* // Example with window size w = 2 (i.e. mod 4 => & 3):
|
|
* // 173 => 1 0 -1 0 -1 0 -1 0 1 = 2^8 - 2^6 - 2^4 - 2^2 + 1
|
|
* int e = 0b10101101; // 173
|
|
* int z[8 + 1] = { 0 }; // our zi/di, we need one extra slot to make the subtraction work
|
|
*
|
|
* int i = 0;
|
|
*
|
|
* while (e)
|
|
* {
|
|
* if (e & 1)
|
|
* {
|
|
* // for window size w = 3 it would be:
|
|
* // => 2^(w-0) = 2^3 = 8
|
|
* // => 2^(w-1) = 2^2 = 4
|
|
*
|
|
* int bit; // = 2 - (e & 3) for w = 2
|
|
*
|
|
* if ((e & 3) >= 2) // e % 4 == e & 3, use (e & 7) >= 4 for w = 3
|
|
* bit = (e & 3) - 4; // (e & 7) - 8 for w = 3
|
|
* else
|
|
* bit = e & 3; // e & 7 for w = 3
|
|
*
|
|
* z[i] = bit;
|
|
* e -= bit;
|
|
* }
|
|
*
|
|
* e >>= 1; // e / 2
|
|
* i++;
|
|
* }
|
|
*/
|
|
|
|
#include "inc_ecc_secp256k1.h"
|
|
|
|
DECLSPEC u32 sub (u32 r[8], const u32 a[8], const u32 b[8])
|
|
{
|
|
u32 c = 0; // carry/borrow
|
|
|
|
#if defined IS_NV && HAS_SUB == 1 && HAS_SUBC == 1
|
|
asm volatile
|
|
(
|
|
"sub.cc.u32 %0, %9, %17;"
|
|
"subc.cc.u32 %1, %10, %18;"
|
|
"subc.cc.u32 %2, %11, %19;"
|
|
"subc.cc.u32 %3, %12, %20;"
|
|
"subc.cc.u32 %4, %13, %21;"
|
|
"subc.cc.u32 %5, %14, %22;"
|
|
"subc.cc.u32 %6, %15, %23;"
|
|
"subc.cc.u32 %7, %16, %24;"
|
|
"subc.u32 %8, 0, 0;"
|
|
: "=r"(r[0]), "=r"(r[1]), "=r"(r[2]), "=r"(r[3]), "=r"(r[4]), "=r"(r[5]), "=r"(r[6]), "=r"(r[7]),
|
|
"=r"(c)
|
|
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
|
|
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
|
|
);
|
|
#elif defined IS_AMD && HAS_VSUB == 1 && HAS_VSUBB == 1
|
|
__asm__ __volatile__
|
|
(
|
|
"V_SUB_U32 %0, %9, %17;"
|
|
"V_SUBB_U32 %1, %10, %18;"
|
|
"V_SUBB_U32 %2, %11, %19;"
|
|
"V_SUBB_U32 %3, %12, %20;"
|
|
"V_SUBB_U32 %4, %13, %21;"
|
|
"V_SUBB_U32 %5, %14, %22;"
|
|
"V_SUBB_U32 %6, %15, %23;"
|
|
"V_SUBB_U32 %7, %16, %24;"
|
|
"V_SUBB_U32 %8, 0, 0;"
|
|
: "=v"(r[0]), "=v"(r[1]), "=v"(r[2]), "=v"(r[3]), "=v"(r[4]), "=v"(r[5]), "=v"(r[6]), "=v"(r[7]),
|
|
"=v"(c)
|
|
: "v"(a[0]), "v"(a[1]), "v"(a[2]), "v"(a[3]), "v"(a[4]), "v"(a[5]), "v"(a[6]), "v"(a[7]),
|
|
"v"(b[0]), "v"(b[1]), "v"(b[2]), "v"(b[3]), "v"(b[4]), "v"(b[5]), "v"(b[6]), "v"(b[7])
|
|
);
|
|
#else
|
|
for (u32 i = 0; i < 8; i++)
|
|
{
|
|
const u32 diff = a[i] - b[i] - c;
|
|
|
|
if (diff != a[i]) c = (diff > a[i]);
|
|
|
|
r[i] = diff;
|
|
}
|
|
#endif
|
|
|
|
return c;
|
|
}
|
|
|
|
DECLSPEC u32 add (u32 r[8], const u32 a[8], const u32 b[8])
|
|
{
|
|
u32 c = 0; // carry/borrow
|
|
|
|
#if defined IS_NV && HAS_ADD == 1 && HAS_ADDC == 1
|
|
asm volatile
|
|
(
|
|
"add.cc.u32 %0, %9, %17;"
|
|
"addc.cc.u32 %1, %10, %18;"
|
|
"addc.cc.u32 %2, %11, %19;"
|
|
"addc.cc.u32 %3, %12, %20;"
|
|
"addc.cc.u32 %4, %13, %21;"
|
|
"addc.cc.u32 %5, %14, %22;"
|
|
"addc.cc.u32 %6, %15, %23;"
|
|
"addc.cc.u32 %7, %16, %24;"
|
|
"addc.u32 %8, 0, 0;"
|
|
: "=r"(r[0]), "=r"(r[1]), "=r"(r[2]), "=r"(r[3]), "=r"(r[4]), "=r"(r[5]), "=r"(r[6]), "=r"(r[7]),
|
|
"=r"(c)
|
|
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
|
|
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
|
|
);
|
|
#elif defined IS_AMD && HAS_VADD == 1 && HAS_VADDC == 1
|
|
__asm__ __volatile__
|
|
(
|
|
"V_ADD_U32 %0, %9, %17;"
|
|
"V_ADDC_U32 %1, %10, %18;"
|
|
"V_ADDC_U32 %2, %11, %19;"
|
|
"V_ADDC_U32 %3, %12, %20;"
|
|
"V_ADDC_U32 %4, %13, %21;"
|
|
"V_ADDC_U32 %5, %14, %22;"
|
|
"V_ADDC_U32 %6, %15, %23;"
|
|
"V_ADDC_U32 %7, %16, %24;"
|
|
"V_ADDC_U32 %8, 0, 0;"
|
|
: "=v"(r[0]), "=v"(r[1]), "=v"(r[2]), "=v"(r[3]), "=v"(r[4]), "=v"(r[5]), "=v"(r[6]), "=v"(r[7]),
|
|
"=v"(c)
|
|
: "v"(a[0]), "v"(a[1]), "v"(a[2]), "v"(a[3]), "v"(a[4]), "v"(a[5]), "v"(a[6]), "v"(a[7]),
|
|
"v"(b[0]), "v"(b[1]), "v"(b[2]), "v"(b[3]), "v"(b[4]), "v"(b[5]), "v"(b[6]), "v"(b[7])
|
|
);
|
|
#else
|
|
for (u32 i = 0; i < 8; i++)
|
|
{
|
|
const u32 t = a[i] + b[i] + c;
|
|
|
|
if (t != a[i]) c = (t < a[i]);
|
|
|
|
r[i] = t;
|
|
}
|
|
#endif
|
|
|
|
return c;
|
|
}
|
|
|
|
DECLSPEC void sub_mod (u32 r[8], const u32 a[8], const u32 b[8])
|
|
{
|
|
const u32 c = sub (r, a, b); // carry
|
|
|
|
if (c)
|
|
{
|
|
u32 t[8];
|
|
|
|
t[0] = SECP256K1_P0;
|
|
t[1] = SECP256K1_P1;
|
|
t[2] = SECP256K1_P2;
|
|
t[3] = SECP256K1_P3;
|
|
t[4] = SECP256K1_P4;
|
|
t[5] = SECP256K1_P5;
|
|
t[6] = SECP256K1_P6;
|
|
t[7] = SECP256K1_P7;
|
|
|
|
add (r, r, t);
|
|
}
|
|
}
|
|
|
|
DECLSPEC void add_mod (u32 r[8], const u32 a[8], const u32 b[8])
|
|
{
|
|
const u32 c = add (r, a, b); // carry
|
|
|
|
/*
|
|
* Modulo operation:
|
|
*/
|
|
|
|
// note: we could have an early exit in case of c == 1 => sub ()
|
|
|
|
u32 t[8];
|
|
|
|
t[0] = SECP256K1_P0;
|
|
t[1] = SECP256K1_P1;
|
|
t[2] = SECP256K1_P2;
|
|
t[3] = SECP256K1_P3;
|
|
t[4] = SECP256K1_P4;
|
|
t[5] = SECP256K1_P5;
|
|
t[6] = SECP256K1_P6;
|
|
t[7] = SECP256K1_P7;
|
|
|
|
// check if modulo operation is needed
|
|
|
|
u32 mod = 1;
|
|
|
|
if (c == 0)
|
|
{
|
|
for (int i = 7; i >= 0; i--)
|
|
{
|
|
if (r[i] < t[i])
|
|
{
|
|
mod = 0;
|
|
|
|
break; // or return ! (check if faster)
|
|
}
|
|
|
|
if (r[i] > t[i]) break;
|
|
}
|
|
}
|
|
|
|
if (mod == 1)
|
|
{
|
|
sub (r, r, t);
|
|
}
|
|
}
|
|
|
|
DECLSPEC void mod_512 (u32 n[16])
|
|
{
|
|
// we need to perform a modulo operation with 512-bit % 256-bit (bignum modulo):
|
|
// the modulus is the secp256k1 group order
|
|
|
|
// ATTENTION: for this function the byte-order is reversed (most significant bytes
|
|
// at the left)
|
|
|
|
/*
|
|
the general modulo by shift and substract code (a = a % b):
|
|
|
|
x = b;
|
|
|
|
t = a >> 1;
|
|
|
|
while (x <= t) x <<= 1;
|
|
|
|
while (a >= b)
|
|
{
|
|
if (a >= x) a -= x;
|
|
|
|
x >>= 1;
|
|
}
|
|
|
|
return a; // remainder
|
|
*/
|
|
|
|
u32 a[16];
|
|
|
|
a[ 0] = n[ 0];
|
|
a[ 1] = n[ 1];
|
|
a[ 2] = n[ 2];
|
|
a[ 3] = n[ 3];
|
|
a[ 4] = n[ 4];
|
|
a[ 5] = n[ 5];
|
|
a[ 6] = n[ 6];
|
|
a[ 7] = n[ 7];
|
|
a[ 8] = n[ 8];
|
|
a[ 9] = n[ 9];
|
|
a[10] = n[10];
|
|
a[11] = n[11];
|
|
a[12] = n[12];
|
|
a[13] = n[13];
|
|
a[14] = n[14];
|
|
a[15] = n[15];
|
|
|
|
u32 b[16];
|
|
|
|
b[ 0] = 0x00000000;
|
|
b[ 1] = 0x00000000;
|
|
b[ 2] = 0x00000000;
|
|
b[ 3] = 0x00000000;
|
|
b[ 4] = 0x00000000;
|
|
b[ 5] = 0x00000000;
|
|
b[ 6] = 0x00000000;
|
|
b[ 7] = 0x00000000;
|
|
b[ 8] = SECP256K1_N7;
|
|
b[ 9] = SECP256K1_N6;
|
|
b[10] = SECP256K1_N5;
|
|
b[11] = SECP256K1_N4;
|
|
b[12] = SECP256K1_N3;
|
|
b[13] = SECP256K1_N2;
|
|
b[14] = SECP256K1_N1;
|
|
b[15] = SECP256K1_N0;
|
|
|
|
/*
|
|
* Start:
|
|
*/
|
|
|
|
// x = b (but with a fast "shift" trick to avoid the while loop)
|
|
|
|
u32 x[16];
|
|
|
|
x[ 0] = b[ 8]; // this is a trick: we just put the group order's most significant bit all the
|
|
x[ 1] = b[ 9]; // way to the top to avoid doing the initial: while (x <= t) x <<= 1
|
|
x[ 2] = b[10];
|
|
x[ 3] = b[11];
|
|
x[ 4] = b[12];
|
|
x[ 5] = b[13];
|
|
x[ 6] = b[14];
|
|
x[ 7] = b[15];
|
|
x[ 8] = 0x00000000;
|
|
x[ 9] = 0x00000000;
|
|
x[10] = 0x00000000;
|
|
x[11] = 0x00000000;
|
|
x[12] = 0x00000000;
|
|
x[13] = 0x00000000;
|
|
x[14] = 0x00000000;
|
|
x[15] = 0x00000000;
|
|
|
|
// a >= b
|
|
|
|
while (a[0] >= b[0])
|
|
{
|
|
u32 l00 = a[ 0] < b[ 0];
|
|
u32 l01 = a[ 1] < b[ 1];
|
|
u32 l02 = a[ 2] < b[ 2];
|
|
u32 l03 = a[ 3] < b[ 3];
|
|
u32 l04 = a[ 4] < b[ 4];
|
|
u32 l05 = a[ 5] < b[ 5];
|
|
u32 l06 = a[ 6] < b[ 6];
|
|
u32 l07 = a[ 7] < b[ 7];
|
|
u32 l08 = a[ 8] < b[ 8];
|
|
u32 l09 = a[ 9] < b[ 9];
|
|
u32 l10 = a[10] < b[10];
|
|
u32 l11 = a[11] < b[11];
|
|
u32 l12 = a[12] < b[12];
|
|
u32 l13 = a[13] < b[13];
|
|
u32 l14 = a[14] < b[14];
|
|
u32 l15 = a[15] < b[15];
|
|
|
|
u32 e00 = a[ 0] == b[ 0];
|
|
u32 e01 = a[ 1] == b[ 1];
|
|
u32 e02 = a[ 2] == b[ 2];
|
|
u32 e03 = a[ 3] == b[ 3];
|
|
u32 e04 = a[ 4] == b[ 4];
|
|
u32 e05 = a[ 5] == b[ 5];
|
|
u32 e06 = a[ 6] == b[ 6];
|
|
u32 e07 = a[ 7] == b[ 7];
|
|
u32 e08 = a[ 8] == b[ 8];
|
|
u32 e09 = a[ 9] == b[ 9];
|
|
u32 e10 = a[10] == b[10];
|
|
u32 e11 = a[11] == b[11];
|
|
u32 e12 = a[12] == b[12];
|
|
u32 e13 = a[13] == b[13];
|
|
u32 e14 = a[14] == b[14];
|
|
|
|
if (l00) break;
|
|
if (l01 && e00) break;
|
|
if (l02 && e00 && e01) break;
|
|
if (l03 && e00 && e01 && e02) break;
|
|
if (l04 && e00 && e01 && e02 && e03) break;
|
|
if (l05 && e00 && e01 && e02 && e03 && e04) break;
|
|
if (l06 && e00 && e01 && e02 && e03 && e04 && e05) break;
|
|
if (l07 && e00 && e01 && e02 && e03 && e04 && e05 && e06) break;
|
|
if (l08 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07) break;
|
|
if (l09 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08) break;
|
|
if (l10 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09) break;
|
|
if (l11 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10) break;
|
|
if (l12 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11) break;
|
|
if (l13 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12) break;
|
|
if (l14 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13) break;
|
|
if (l15 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13 && e14) break;
|
|
|
|
// r = x (copy it to have the original values for the subtraction)
|
|
|
|
u32 r[16];
|
|
|
|
r[ 0] = x[ 0];
|
|
r[ 1] = x[ 1];
|
|
r[ 2] = x[ 2];
|
|
r[ 3] = x[ 3];
|
|
r[ 4] = x[ 4];
|
|
r[ 5] = x[ 5];
|
|
r[ 6] = x[ 6];
|
|
r[ 7] = x[ 7];
|
|
r[ 8] = x[ 8];
|
|
r[ 9] = x[ 9];
|
|
r[10] = x[10];
|
|
r[11] = x[11];
|
|
r[12] = x[12];
|
|
r[13] = x[13];
|
|
r[14] = x[14];
|
|
r[15] = x[15];
|
|
|
|
// x <<= 1
|
|
|
|
x[15] = x[15] >> 1 | x[14] << 31;
|
|
x[14] = x[14] >> 1 | x[13] << 31;
|
|
x[13] = x[13] >> 1 | x[12] << 31;
|
|
x[12] = x[12] >> 1 | x[11] << 31;
|
|
x[11] = x[11] >> 1 | x[10] << 31;
|
|
x[10] = x[10] >> 1 | x[ 9] << 31;
|
|
x[ 9] = x[ 9] >> 1 | x[ 8] << 31;
|
|
x[ 8] = x[ 8] >> 1 | x[ 7] << 31;
|
|
x[ 7] = x[ 7] >> 1 | x[ 6] << 31;
|
|
x[ 6] = x[ 6] >> 1 | x[ 5] << 31;
|
|
x[ 5] = x[ 5] >> 1 | x[ 4] << 31;
|
|
x[ 4] = x[ 4] >> 1 | x[ 3] << 31;
|
|
x[ 3] = x[ 3] >> 1 | x[ 2] << 31;
|
|
x[ 2] = x[ 2] >> 1 | x[ 1] << 31;
|
|
x[ 1] = x[ 1] >> 1 | x[ 0] << 31;
|
|
x[ 0] = x[ 0] >> 1;
|
|
|
|
// if (a >= r) a -= r;
|
|
|
|
l00 = a[ 0] < r[ 0];
|
|
l01 = a[ 1] < r[ 1];
|
|
l02 = a[ 2] < r[ 2];
|
|
l03 = a[ 3] < r[ 3];
|
|
l04 = a[ 4] < r[ 4];
|
|
l05 = a[ 5] < r[ 5];
|
|
l06 = a[ 6] < r[ 6];
|
|
l07 = a[ 7] < r[ 7];
|
|
l08 = a[ 8] < r[ 8];
|
|
l09 = a[ 9] < r[ 9];
|
|
l10 = a[10] < r[10];
|
|
l11 = a[11] < r[11];
|
|
l12 = a[12] < r[12];
|
|
l13 = a[13] < r[13];
|
|
l14 = a[14] < r[14];
|
|
l15 = a[15] < r[15];
|
|
|
|
e00 = a[ 0] == r[ 0];
|
|
e01 = a[ 1] == r[ 1];
|
|
e02 = a[ 2] == r[ 2];
|
|
e03 = a[ 3] == r[ 3];
|
|
e04 = a[ 4] == r[ 4];
|
|
e05 = a[ 5] == r[ 5];
|
|
e06 = a[ 6] == r[ 6];
|
|
e07 = a[ 7] == r[ 7];
|
|
e08 = a[ 8] == r[ 8];
|
|
e09 = a[ 9] == r[ 9];
|
|
e10 = a[10] == r[10];
|
|
e11 = a[11] == r[11];
|
|
e12 = a[12] == r[12];
|
|
e13 = a[13] == r[13];
|
|
e14 = a[14] == r[14];
|
|
|
|
if (l00) continue;
|
|
if (l01 && e00) continue;
|
|
if (l02 && e00 && e01) continue;
|
|
if (l03 && e00 && e01 && e02) continue;
|
|
if (l04 && e00 && e01 && e02 && e03) continue;
|
|
if (l05 && e00 && e01 && e02 && e03 && e04) continue;
|
|
if (l06 && e00 && e01 && e02 && e03 && e04 && e05) continue;
|
|
if (l07 && e00 && e01 && e02 && e03 && e04 && e05 && e06) continue;
|
|
if (l08 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07) continue;
|
|
if (l09 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08) continue;
|
|
if (l10 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09) continue;
|
|
if (l11 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10) continue;
|
|
if (l12 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11) continue;
|
|
if (l13 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12) continue;
|
|
if (l14 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13) continue;
|
|
if (l15 && e00 && e01 && e02 && e03 && e04 && e05 && e06 && e07 && e08 && e09 && e10 && e11 && e12 && e13 && e14) continue;
|
|
|
|
// substract (a -= r):
|
|
|
|
r[ 0] = a[ 0] - r[ 0];
|
|
r[ 1] = a[ 1] - r[ 1];
|
|
r[ 2] = a[ 2] - r[ 2];
|
|
r[ 3] = a[ 3] - r[ 3];
|
|
r[ 4] = a[ 4] - r[ 4];
|
|
r[ 5] = a[ 5] - r[ 5];
|
|
r[ 6] = a[ 6] - r[ 6];
|
|
r[ 7] = a[ 7] - r[ 7];
|
|
r[ 8] = a[ 8] - r[ 8];
|
|
r[ 9] = a[ 9] - r[ 9];
|
|
r[10] = a[10] - r[10];
|
|
r[11] = a[11] - r[11];
|
|
r[12] = a[12] - r[12];
|
|
r[13] = a[13] - r[13];
|
|
r[14] = a[14] - r[14];
|
|
r[15] = a[15] - r[15];
|
|
|
|
// take care of the "borrow" (we can't do it the other way around 15...1 because r[x] is changed!)
|
|
|
|
if (r[ 1] > a[ 1]) r[ 0]--;
|
|
if (r[ 2] > a[ 2]) r[ 1]--;
|
|
if (r[ 3] > a[ 3]) r[ 2]--;
|
|
if (r[ 4] > a[ 4]) r[ 3]--;
|
|
if (r[ 5] > a[ 5]) r[ 4]--;
|
|
if (r[ 6] > a[ 6]) r[ 5]--;
|
|
if (r[ 7] > a[ 7]) r[ 6]--;
|
|
if (r[ 8] > a[ 8]) r[ 7]--;
|
|
if (r[ 9] > a[ 9]) r[ 8]--;
|
|
if (r[10] > a[10]) r[ 9]--;
|
|
if (r[11] > a[11]) r[10]--;
|
|
if (r[12] > a[12]) r[11]--;
|
|
if (r[13] > a[13]) r[12]--;
|
|
if (r[14] > a[14]) r[13]--;
|
|
if (r[15] > a[15]) r[14]--;
|
|
|
|
a[ 0] = r[ 0];
|
|
a[ 1] = r[ 1];
|
|
a[ 2] = r[ 2];
|
|
a[ 3] = r[ 3];
|
|
a[ 4] = r[ 4];
|
|
a[ 5] = r[ 5];
|
|
a[ 6] = r[ 6];
|
|
a[ 7] = r[ 7];
|
|
a[ 8] = r[ 8];
|
|
a[ 9] = r[ 9];
|
|
a[10] = r[10];
|
|
a[11] = r[11];
|
|
a[12] = r[12];
|
|
a[13] = r[13];
|
|
a[14] = r[14];
|
|
a[15] = r[15];
|
|
}
|
|
|
|
n[ 0] = a[ 0];
|
|
n[ 1] = a[ 1];
|
|
n[ 2] = a[ 2];
|
|
n[ 3] = a[ 3];
|
|
n[ 4] = a[ 4];
|
|
n[ 5] = a[ 5];
|
|
n[ 6] = a[ 6];
|
|
n[ 7] = a[ 7];
|
|
n[ 8] = a[ 8];
|
|
n[ 9] = a[ 9];
|
|
n[10] = a[10];
|
|
n[11] = a[11];
|
|
n[12] = a[12];
|
|
n[13] = a[13];
|
|
n[14] = a[14];
|
|
n[15] = a[15];
|
|
}
|
|
|
|
DECLSPEC void mul_mod (u32 r[8], const u32 a[8], const u32 b[8]) // TODO get rid of u64 ?
|
|
{
|
|
u32 t[16] = { 0 }; // we need up to double the space (2 * 8)
|
|
|
|
/*
|
|
* First start with the basic a * b multiplication:
|
|
*/
|
|
|
|
u32 t0 = 0;
|
|
u32 t1 = 0;
|
|
u32 c = 0;
|
|
|
|
for (u32 i = 0; i < 8; i++)
|
|
{
|
|
for (u32 j = 0; j <= i; j++)
|
|
{
|
|
u64 p = ((u64) a[j]) * b[i - j];
|
|
|
|
u64 d = ((u64) t1) << 32 | t0;
|
|
|
|
d += p;
|
|
|
|
t0 = (u32) d;
|
|
t1 = d >> 32;
|
|
|
|
c += d < p; // carry
|
|
}
|
|
|
|
t[i] = t0;
|
|
|
|
t0 = t1;
|
|
t1 = c;
|
|
|
|
c = 0;
|
|
}
|
|
|
|
for (u32 i = 8; i < 15; i++)
|
|
{
|
|
for (u32 j = i - 7; j < 8; j++)
|
|
{
|
|
u64 p = ((u64) a[j]) * b[i - j];
|
|
|
|
u64 d = ((u64) t1) << 32 | t0;
|
|
|
|
d += p;
|
|
|
|
t0 = (u32) d;
|
|
t1 = d >> 32;
|
|
|
|
c += d < p;
|
|
}
|
|
|
|
t[i] = t0;
|
|
|
|
t0 = t1;
|
|
t1 = c;
|
|
|
|
c = 0;
|
|
}
|
|
|
|
t[15] = t0;
|
|
|
|
|
|
|
|
/*
|
|
* Now do the modulo operation:
|
|
* (r = t % p)
|
|
*
|
|
* http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf (p.354 or p.9 in that document)
|
|
*/
|
|
|
|
u32 tmp[16] = { 0 };
|
|
|
|
// c = 0;
|
|
|
|
// Note: SECP256K1_P = 2^256 - 2^32 - 977 (0x03d1 = 977)
|
|
// multiply t[8]...t[15] by omega:
|
|
|
|
for (u32 i = 0, j = 8; i < 8; i++, j++)
|
|
{
|
|
u64 p = ((u64) 0x03d1) * t[j] + c;
|
|
|
|
tmp[i] = (u32) p;
|
|
|
|
c = p >> 32;
|
|
}
|
|
|
|
tmp[8] = c;
|
|
|
|
c = add (tmp + 1, tmp + 1, t + 8); // modifies tmp[1]...tmp[8]
|
|
|
|
tmp[9] = c;
|
|
|
|
|
|
// r = t + tmp
|
|
|
|
c = add (r, t, tmp);
|
|
|
|
// multiply t[0]...t[7] by omega:
|
|
|
|
u32 c2 = 0;
|
|
|
|
// memset (t, 0, sizeof (t));
|
|
|
|
for (u32 i = 0, j = 8; i < 8; i++, j++)
|
|
{
|
|
u64 p = ((u64) 0x3d1) * tmp[j] + c2;
|
|
|
|
t[i] = (u32) p;
|
|
|
|
c2 = p >> 32;
|
|
}
|
|
|
|
t[8] = c2;
|
|
|
|
c2 = add (t + 1, t + 1, tmp + 8); // modifies t[1]...t[8]
|
|
|
|
t[9] = c2;
|
|
|
|
|
|
// r = r + t
|
|
|
|
c2 = add (r, r, t);
|
|
|
|
c += c2;
|
|
|
|
t[0] = SECP256K1_P0;
|
|
t[1] = SECP256K1_P1;
|
|
t[2] = SECP256K1_P2;
|
|
t[3] = SECP256K1_P3;
|
|
t[4] = SECP256K1_P4;
|
|
t[5] = SECP256K1_P5;
|
|
t[6] = SECP256K1_P6;
|
|
t[7] = SECP256K1_P7;
|
|
|
|
for (u32 i = c; i > 0; i--)
|
|
{
|
|
sub (r, r, t);
|
|
}
|
|
|
|
for (int i = 7; i >= 0; i--)
|
|
{
|
|
if (r[i] < t[i]) break;
|
|
|
|
if (r[i] > t[i])
|
|
{
|
|
sub (r, r, t);
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
DECLSPEC void sqrt_mod (u32 r[8])
|
|
{
|
|
// Fermat's Little Theorem
|
|
// secp256k1: y^2 = x^3 + 7 % p
|
|
// y ^ (p - 1) = 1
|
|
// y ^ (p - 1) = (y^2) ^ ((p - 1) / 2) = 1 => y^2 = (y^2) ^ (((p - 1) / 2) + 1)
|
|
// => y = (y^2) ^ ((((p - 1) / 2) + 1) / 2)
|
|
// y = (y^2) ^ (((p - 1 + 2) / 2) / 2) = (y^2) ^ ((p + 1) / 4)
|
|
|
|
// y1 = (x^3 + 7) ^ ((p + 1) / 4)
|
|
// y2 = p - y1 (or y2 = y1 * -1 % p)
|
|
|
|
u32 s[8];
|
|
|
|
s[0] = SECP256K1_P0 + 1; // because of (p + 1) / 4 or use add (s, s, 1)
|
|
s[1] = SECP256K1_P1;
|
|
s[2] = SECP256K1_P2;
|
|
s[3] = SECP256K1_P3;
|
|
s[4] = SECP256K1_P4;
|
|
s[5] = SECP256K1_P5;
|
|
s[6] = SECP256K1_P6;
|
|
s[7] = SECP256K1_P7;
|
|
|
|
u32 t[8] = { 0 };
|
|
|
|
t[0] = 1;
|
|
|
|
for (u32 i = 255; i > 1; i--) // we just skip the last 2 multiplications (=> exp / 4)
|
|
{
|
|
mul_mod (t, t, t); // r * r
|
|
|
|
u32 idx = i >> 5;
|
|
u32 mask = 1 << (i & 0x1f);
|
|
|
|
if (s[idx] & mask)
|
|
{
|
|
mul_mod (t, t, r); // t * r
|
|
}
|
|
}
|
|
|
|
r[0] = t[0];
|
|
r[1] = t[1];
|
|
r[2] = t[2];
|
|
r[3] = t[3];
|
|
r[4] = t[4];
|
|
r[5] = t[5];
|
|
r[6] = t[6];
|
|
r[7] = t[7];
|
|
}
|
|
|
|
// (inverse (a, p) * a) % p == 1 (or think of a * a^-1 = a / a = 1)
|
|
|
|
DECLSPEC void inv_mod (u32 a[8])
|
|
{
|
|
// How often does this really happen? it should "almost" never happen (but would be safer)
|
|
// if ((a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0) return;
|
|
|
|
u32 t0[8];
|
|
|
|
t0[0] = a[0];
|
|
t0[1] = a[1];
|
|
t0[2] = a[2];
|
|
t0[3] = a[3];
|
|
t0[4] = a[4];
|
|
t0[5] = a[5];
|
|
t0[6] = a[6];
|
|
t0[7] = a[7];
|
|
|
|
u32 p[8];
|
|
|
|
p[0] = SECP256K1_P0;
|
|
p[1] = SECP256K1_P1;
|
|
p[2] = SECP256K1_P2;
|
|
p[3] = SECP256K1_P3;
|
|
p[4] = SECP256K1_P4;
|
|
p[5] = SECP256K1_P5;
|
|
p[6] = SECP256K1_P6;
|
|
p[7] = SECP256K1_P7;
|
|
|
|
u32 t1[8];
|
|
|
|
t1[0] = SECP256K1_P0;
|
|
t1[1] = SECP256K1_P1;
|
|
t1[2] = SECP256K1_P2;
|
|
t1[3] = SECP256K1_P3;
|
|
t1[4] = SECP256K1_P4;
|
|
t1[5] = SECP256K1_P5;
|
|
t1[6] = SECP256K1_P6;
|
|
t1[7] = SECP256K1_P7;
|
|
|
|
u32 t2[8] = { 0 };
|
|
|
|
t2[0] = 0x00000001;
|
|
|
|
u32 t3[8] = { 0 };
|
|
|
|
u32 b = (t0[0] != t1[0])
|
|
| (t0[1] != t1[1])
|
|
| (t0[2] != t1[2])
|
|
| (t0[3] != t1[3])
|
|
| (t0[4] != t1[4])
|
|
| (t0[5] != t1[5])
|
|
| (t0[6] != t1[6])
|
|
| (t0[7] != t1[7]);
|
|
|
|
while (b)
|
|
{
|
|
if ((t0[0] & 1) == 0) // even
|
|
{
|
|
t0[0] = t0[0] >> 1 | t0[1] << 31;
|
|
t0[1] = t0[1] >> 1 | t0[2] << 31;
|
|
t0[2] = t0[2] >> 1 | t0[3] << 31;
|
|
t0[3] = t0[3] >> 1 | t0[4] << 31;
|
|
t0[4] = t0[4] >> 1 | t0[5] << 31;
|
|
t0[5] = t0[5] >> 1 | t0[6] << 31;
|
|
t0[6] = t0[6] >> 1 | t0[7] << 31;
|
|
t0[7] = t0[7] >> 1;
|
|
|
|
u32 c = 0;
|
|
|
|
if (t2[0] & 1) c = add (t2, t2, p);
|
|
|
|
t2[0] = t2[0] >> 1 | t2[1] << 31;
|
|
t2[1] = t2[1] >> 1 | t2[2] << 31;
|
|
t2[2] = t2[2] >> 1 | t2[3] << 31;
|
|
t2[3] = t2[3] >> 1 | t2[4] << 31;
|
|
t2[4] = t2[4] >> 1 | t2[5] << 31;
|
|
t2[5] = t2[5] >> 1 | t2[6] << 31;
|
|
t2[6] = t2[6] >> 1 | t2[7] << 31;
|
|
t2[7] = t2[7] >> 1 | c << 31;
|
|
}
|
|
else if ((t1[0] & 1) == 0)
|
|
{
|
|
t1[0] = t1[0] >> 1 | t1[1] << 31;
|
|
t1[1] = t1[1] >> 1 | t1[2] << 31;
|
|
t1[2] = t1[2] >> 1 | t1[3] << 31;
|
|
t1[3] = t1[3] >> 1 | t1[4] << 31;
|
|
t1[4] = t1[4] >> 1 | t1[5] << 31;
|
|
t1[5] = t1[5] >> 1 | t1[6] << 31;
|
|
t1[6] = t1[6] >> 1 | t1[7] << 31;
|
|
t1[7] = t1[7] >> 1;
|
|
|
|
u32 c = 0;
|
|
|
|
if (t3[0] & 1) c = add (t3, t3, p);
|
|
|
|
t3[0] = t3[0] >> 1 | t3[1] << 31;
|
|
t3[1] = t3[1] >> 1 | t3[2] << 31;
|
|
t3[2] = t3[2] >> 1 | t3[3] << 31;
|
|
t3[3] = t3[3] >> 1 | t3[4] << 31;
|
|
t3[4] = t3[4] >> 1 | t3[5] << 31;
|
|
t3[5] = t3[5] >> 1 | t3[6] << 31;
|
|
t3[6] = t3[6] >> 1 | t3[7] << 31;
|
|
t3[7] = t3[7] >> 1 | c << 31;
|
|
}
|
|
else
|
|
{
|
|
u32 gt = 0;
|
|
|
|
for (int i = 7; i >= 0; i--)
|
|
{
|
|
if (t0[i] > t1[i])
|
|
{
|
|
gt = 1;
|
|
|
|
break;
|
|
}
|
|
|
|
if (t0[i] < t1[i]) break;
|
|
}
|
|
|
|
if (gt)
|
|
{
|
|
sub (t0, t0, t1);
|
|
|
|
t0[0] = t0[0] >> 1 | t0[1] << 31;
|
|
t0[1] = t0[1] >> 1 | t0[2] << 31;
|
|
t0[2] = t0[2] >> 1 | t0[3] << 31;
|
|
t0[3] = t0[3] >> 1 | t0[4] << 31;
|
|
t0[4] = t0[4] >> 1 | t0[5] << 31;
|
|
t0[5] = t0[5] >> 1 | t0[6] << 31;
|
|
t0[6] = t0[6] >> 1 | t0[7] << 31;
|
|
t0[7] = t0[7] >> 1;
|
|
|
|
u32 lt = 0;
|
|
|
|
for (int i = 7; i >= 0; i--)
|
|
{
|
|
if (t2[i] < t3[i])
|
|
{
|
|
lt = 1;
|
|
|
|
break;
|
|
}
|
|
|
|
if (t2[i] > t3[i]) break;
|
|
}
|
|
|
|
if (lt) add (t2, t2, p);
|
|
|
|
sub (t2, t2, t3);
|
|
|
|
u32 c = 0;
|
|
|
|
if (t2[0] & 1) c = add (t2, t2, p);
|
|
|
|
t2[0] = t2[0] >> 1 | t2[1] << 31;
|
|
t2[1] = t2[1] >> 1 | t2[2] << 31;
|
|
t2[2] = t2[2] >> 1 | t2[3] << 31;
|
|
t2[3] = t2[3] >> 1 | t2[4] << 31;
|
|
t2[4] = t2[4] >> 1 | t2[5] << 31;
|
|
t2[5] = t2[5] >> 1 | t2[6] << 31;
|
|
t2[6] = t2[6] >> 1 | t2[7] << 31;
|
|
t2[7] = t2[7] >> 1 | c << 31;
|
|
}
|
|
else
|
|
{
|
|
sub (t1, t1, t0);
|
|
|
|
t1[0] = t1[0] >> 1 | t1[1] << 31;
|
|
t1[1] = t1[1] >> 1 | t1[2] << 31;
|
|
t1[2] = t1[2] >> 1 | t1[3] << 31;
|
|
t1[3] = t1[3] >> 1 | t1[4] << 31;
|
|
t1[4] = t1[4] >> 1 | t1[5] << 31;
|
|
t1[5] = t1[5] >> 1 | t1[6] << 31;
|
|
t1[6] = t1[6] >> 1 | t1[7] << 31;
|
|
t1[7] = t1[7] >> 1;
|
|
|
|
u32 lt = 0;
|
|
|
|
for (int i = 7; i >= 0; i--)
|
|
{
|
|
if (t3[i] < t2[i])
|
|
{
|
|
lt = 1;
|
|
|
|
break;
|
|
}
|
|
|
|
if (t3[i] > t2[i]) break;
|
|
}
|
|
|
|
if (lt) add (t3, t3, p);
|
|
|
|
sub (t3, t3, t2);
|
|
|
|
u32 c = 0;
|
|
|
|
if (t3[0] & 1) c = add (t3, t3, p);
|
|
|
|
t3[0] = t3[0] >> 1 | t3[1] << 31;
|
|
t3[1] = t3[1] >> 1 | t3[2] << 31;
|
|
t3[2] = t3[2] >> 1 | t3[3] << 31;
|
|
t3[3] = t3[3] >> 1 | t3[4] << 31;
|
|
t3[4] = t3[4] >> 1 | t3[5] << 31;
|
|
t3[5] = t3[5] >> 1 | t3[6] << 31;
|
|
t3[6] = t3[6] >> 1 | t3[7] << 31;
|
|
t3[7] = t3[7] >> 1 | c << 31;
|
|
}
|
|
}
|
|
|
|
// update b:
|
|
|
|
b = (t0[0] != t1[0])
|
|
| (t0[1] != t1[1])
|
|
| (t0[2] != t1[2])
|
|
| (t0[3] != t1[3])
|
|
| (t0[4] != t1[4])
|
|
| (t0[5] != t1[5])
|
|
| (t0[6] != t1[6])
|
|
| (t0[7] != t1[7]);
|
|
}
|
|
|
|
// set result:
|
|
|
|
a[0] = t2[0];
|
|
a[1] = t2[1];
|
|
a[2] = t2[2];
|
|
a[3] = t2[3];
|
|
a[4] = t2[4];
|
|
a[5] = t2[5];
|
|
a[6] = t2[6];
|
|
a[7] = t2[7];
|
|
}
|
|
|
|
/*
|
|
// everything from the formulas below of course MOD the prime:
|
|
|
|
// we use this formula:
|
|
|
|
X = (3/2 * x^2)^2 - 2 * x * y^2
|
|
Y = (3/2 * x^2) * (x * y^2 - X) - y^4
|
|
Z = y * z
|
|
|
|
this is identical to the more frequently used form:
|
|
|
|
X = (3 * x^2)^2 - 8 * x * y^2
|
|
Y = 3 * x^2 * (4 * x * y^2 - X) - 8 * y^4
|
|
Z = 2 * y * z
|
|
*/
|
|
|
|
DECLSPEC void point_double (u32 x[8], u32 y[8], u32 z[8])
|
|
{
|
|
// How often does this really happen? it should "almost" never happen (but would be safer)
|
|
|
|
/*
|
|
if ((y[0] | y[1] | y[2] | y[3] | y[4] | y[5] | y[6] | y[7]) == 0)
|
|
{
|
|
x[0] = 0;
|
|
x[1] = 0;
|
|
x[2] = 0;
|
|
x[3] = 0;
|
|
x[4] = 0;
|
|
x[5] = 0;
|
|
x[6] = 0;
|
|
x[7] = 0;
|
|
|
|
y[0] = 0;
|
|
y[1] = 0;
|
|
y[2] = 0;
|
|
y[3] = 0;
|
|
y[4] = 0;
|
|
y[5] = 0;
|
|
y[6] = 0;
|
|
y[7] = 0;
|
|
|
|
z[0] = 0;
|
|
z[1] = 0;
|
|
z[2] = 0;
|
|
z[3] = 0;
|
|
z[4] = 0;
|
|
z[5] = 0;
|
|
z[6] = 0;
|
|
z[7] = 0;
|
|
|
|
return;
|
|
}
|
|
*/
|
|
|
|
u32 t1[8];
|
|
|
|
t1[0] = x[0];
|
|
t1[1] = x[1];
|
|
t1[2] = x[2];
|
|
t1[3] = x[3];
|
|
t1[4] = x[4];
|
|
t1[5] = x[5];
|
|
t1[6] = x[6];
|
|
t1[7] = x[7];
|
|
|
|
u32 t2[8];
|
|
|
|
t2[0] = y[0];
|
|
t2[1] = y[1];
|
|
t2[2] = y[2];
|
|
t2[3] = y[3];
|
|
t2[4] = y[4];
|
|
t2[5] = y[5];
|
|
t2[6] = y[6];
|
|
t2[7] = y[7];
|
|
|
|
u32 t3[8];
|
|
|
|
t3[0] = z[0];
|
|
t3[1] = z[1];
|
|
t3[2] = z[2];
|
|
t3[3] = z[3];
|
|
t3[4] = z[4];
|
|
t3[5] = z[5];
|
|
t3[6] = z[6];
|
|
t3[7] = z[7];
|
|
|
|
u32 t4[8];
|
|
u32 t5[8];
|
|
u32 t6[8];
|
|
|
|
mul_mod (t4, t1, t1); // t4 = x^2
|
|
|
|
mul_mod (t5, t2, t2); // t5 = y^2
|
|
|
|
mul_mod (t1, t1, t5); // t1 = x*y^2
|
|
|
|
mul_mod (t5, t5, t5); // t5 = t5^2 = y^4
|
|
|
|
// here the z^2 and z^4 is not needed for a = 0
|
|
|
|
mul_mod (t3, t2, t3); // t3 = x * z
|
|
|
|
add_mod (t2, t4, t4); // t2 = 2 * t4 = 2 * x^2
|
|
add_mod (t4, t4, t2); // t4 = 3 * t4 = 3 * x^2
|
|
|
|
// a * z^4 = 0 * 1^4 = 0
|
|
|
|
// don't discard the least significant bit it's important too!
|
|
|
|
u32 c = 0;
|
|
|
|
if (t4[0] & 1)
|
|
{
|
|
u32 t[8];
|
|
|
|
t[0] = SECP256K1_P0;
|
|
t[1] = SECP256K1_P1;
|
|
t[2] = SECP256K1_P2;
|
|
t[3] = SECP256K1_P3;
|
|
t[4] = SECP256K1_P4;
|
|
t[5] = SECP256K1_P5;
|
|
t[6] = SECP256K1_P6;
|
|
t[7] = SECP256K1_P7;
|
|
|
|
c = add (t4, t4, t); // t4 + SECP256K1_P
|
|
}
|
|
|
|
// right shift (t4 / 2):
|
|
|
|
t4[0] = t4[0] >> 1 | t4[1] << 31;
|
|
t4[1] = t4[1] >> 1 | t4[2] << 31;
|
|
t4[2] = t4[2] >> 1 | t4[3] << 31;
|
|
t4[3] = t4[3] >> 1 | t4[4] << 31;
|
|
t4[4] = t4[4] >> 1 | t4[5] << 31;
|
|
t4[5] = t4[5] >> 1 | t4[6] << 31;
|
|
t4[6] = t4[6] >> 1 | t4[7] << 31;
|
|
t4[7] = t4[7] >> 1 | c << 31;
|
|
|
|
mul_mod (t6, t4, t4); // t6 = t4^2 = (3/2 * x^2)^2
|
|
|
|
add_mod (t2, t1, t1); // t2 = 2 * t1
|
|
|
|
sub_mod (t6, t6, t2); // t6 = t6 - t2
|
|
sub_mod (t1, t1, t6); // t1 = t1 - t6
|
|
|
|
mul_mod (t4, t4, t1); // t4 = t4 * t1
|
|
|
|
sub_mod (t1, t4, t5); // t1 = t4 - t5
|
|
|
|
// => x = t6, y = t1, z = t3:
|
|
|
|
x[0] = t6[0];
|
|
x[1] = t6[1];
|
|
x[2] = t6[2];
|
|
x[3] = t6[3];
|
|
x[4] = t6[4];
|
|
x[5] = t6[5];
|
|
x[6] = t6[6];
|
|
x[7] = t6[7];
|
|
|
|
y[0] = t1[0];
|
|
y[1] = t1[1];
|
|
y[2] = t1[2];
|
|
y[3] = t1[3];
|
|
y[4] = t1[4];
|
|
y[5] = t1[5];
|
|
y[6] = t1[6];
|
|
y[7] = t1[7];
|
|
|
|
z[0] = t3[0];
|
|
z[1] = t3[1];
|
|
z[2] = t3[2];
|
|
z[3] = t3[3];
|
|
z[4] = t3[4];
|
|
z[5] = t3[5];
|
|
z[6] = t3[6];
|
|
z[7] = t3[7];
|
|
}
|
|
|
|
DECLSPEC void point_add (u32 x1[8], u32 y1[8], u32 z1[8], const u32 x2[8], const u32 y2[8], const u32 z2[8])
|
|
{
|
|
// How often does this really happen? it should "almost" never happen (but would be safer)
|
|
|
|
/*
|
|
if ((y2[0] | y2[1] | y2[2] | y2[3] | y2[4] | y2[5] | y2[6] | y2[7]) == 0) return;
|
|
|
|
if ((y1[0] | y1[1] | y1[2] | y1[3] | y1[4] | y1[5] | y1[6] | y1[7]) == 0)
|
|
{
|
|
x1[0] = x2[0];
|
|
x1[1] = x2[1];
|
|
x1[2] = x2[2];
|
|
x1[3] = x2[3];
|
|
x1[4] = x2[4];
|
|
x1[5] = x2[5];
|
|
x1[6] = x2[6];
|
|
x1[7] = x2[7];
|
|
|
|
y1[0] = y2[0];
|
|
y1[1] = y2[1];
|
|
y1[2] = y2[2];
|
|
y1[3] = y2[3];
|
|
y1[4] = y2[4];
|
|
y1[5] = y2[5];
|
|
y1[6] = y2[6];
|
|
y1[7] = y2[7];
|
|
|
|
z1[0] = z2[0];
|
|
z1[1] = z2[1];
|
|
z1[2] = z2[2];
|
|
z1[3] = z2[3];
|
|
z1[4] = z2[4];
|
|
z1[5] = z2[5];
|
|
z1[6] = z2[6];
|
|
z1[7] = z2[7];
|
|
|
|
return;
|
|
}
|
|
*/
|
|
|
|
// if x1 == x2 and y2 == y2 and z2 == z2 we need to double instead?
|
|
|
|
// x1/y1/z1:
|
|
|
|
u32 t1[8];
|
|
|
|
t1[0] = x1[0];
|
|
t1[1] = x1[1];
|
|
t1[2] = x1[2];
|
|
t1[3] = x1[3];
|
|
t1[4] = x1[4];
|
|
t1[5] = x1[5];
|
|
t1[6] = x1[6];
|
|
t1[7] = x1[7];
|
|
|
|
u32 t2[8];
|
|
|
|
t2[0] = y1[0];
|
|
t2[1] = y1[1];
|
|
t2[2] = y1[2];
|
|
t2[3] = y1[3];
|
|
t2[4] = y1[4];
|
|
t2[5] = y1[5];
|
|
t2[6] = y1[6];
|
|
t2[7] = y1[7];
|
|
|
|
u32 t3[8];
|
|
|
|
t3[0] = z1[0];
|
|
t3[1] = z1[1];
|
|
t3[2] = z1[2];
|
|
t3[3] = z1[3];
|
|
t3[4] = z1[4];
|
|
t3[5] = z1[5];
|
|
t3[6] = z1[6];
|
|
t3[7] = z1[7];
|
|
|
|
// x2/y2/z2:
|
|
|
|
u32 t4[8];
|
|
|
|
t4[0] = x2[0];
|
|
t4[1] = x2[1];
|
|
t4[2] = x2[2];
|
|
t4[3] = x2[3];
|
|
t4[4] = x2[4];
|
|
t4[5] = x2[5];
|
|
t4[6] = x2[6];
|
|
t4[7] = x2[7];
|
|
|
|
u32 t5[8];
|
|
|
|
t5[0] = y2[0];
|
|
t5[1] = y2[1];
|
|
t5[2] = y2[2];
|
|
t5[3] = y2[3];
|
|
t5[4] = y2[4];
|
|
t5[5] = y2[5];
|
|
t5[6] = y2[6];
|
|
t5[7] = y2[7];
|
|
|
|
u32 t6[8];
|
|
|
|
t6[0] = z2[0];
|
|
t6[1] = z2[1];
|
|
t6[2] = z2[2];
|
|
t6[3] = z2[3];
|
|
t6[4] = z2[4];
|
|
t6[5] = z2[5];
|
|
t6[6] = z2[6];
|
|
t6[7] = z2[7];
|
|
|
|
u32 t7[8];
|
|
|
|
mul_mod (t7, t3, t3); // t7 = z1^2
|
|
mul_mod (t4, t4, t7); // t4 = x2 * z1^2 = B
|
|
|
|
mul_mod (t5, t5, t3); // t5 = y2 * z1
|
|
mul_mod (t5, t5, t7); // t5 = y2 * z1^3 = D
|
|
|
|
mul_mod (t7, t6, t6); // t7 = z2^2
|
|
|
|
mul_mod (t1, t1, t7); // t1 = x1 * z2^2
|
|
|
|
mul_mod (t2, t2, t6); // t2 = y1 * z2
|
|
mul_mod (t2, t2, t7); // t2 = y1 * z2^3 = C
|
|
|
|
sub_mod (t1, t1, t4); // t1 = A - B = E
|
|
|
|
mul_mod (t3, t6, t3); // t3 = z1 * z2
|
|
mul_mod (t3, t1, t3); // t3 = z1 * z2 * E = Z3
|
|
|
|
sub_mod (t2, t2, t5); // t2 = C - D = F
|
|
|
|
mul_mod (t7, t1, t1); // t7 = E^2
|
|
mul_mod (t6, t2, t2); // t6 = F^2
|
|
|
|
mul_mod (t4, t4, t7); // t4 = B * E^2
|
|
mul_mod (t1, t7, t1); // t1 = E^3
|
|
|
|
sub_mod (t6, t6, t1); // t6 = F^2 - E^3
|
|
|
|
add_mod (t7, t4, t4); // t7 = 2 * B * E^2
|
|
|
|
sub_mod (t6, t6, t7); // t6 = F^2 - E^2 - 2 * B * E^2 = X3
|
|
sub_mod (t4, t4, t6); // t4 = B * E^2 - X3
|
|
|
|
mul_mod (t2, t2, t4); // t2 = F * (B * E^2 - X3)
|
|
mul_mod (t7, t5, t1); // t7 = D * E^3
|
|
|
|
sub_mod (t7, t2, t7); // t7 = F * (B * E^2 - X3) - D * E^3 = Y3
|
|
|
|
x1[0] = t6[0];
|
|
x1[1] = t6[1];
|
|
x1[2] = t6[2];
|
|
x1[3] = t6[3];
|
|
x1[4] = t6[4];
|
|
x1[5] = t6[5];
|
|
x1[6] = t6[6];
|
|
x1[7] = t6[7];
|
|
|
|
y1[0] = t7[0];
|
|
y1[1] = t7[1];
|
|
y1[2] = t7[2];
|
|
y1[3] = t7[3];
|
|
y1[4] = t7[4];
|
|
y1[5] = t7[5];
|
|
y1[6] = t7[6];
|
|
y1[7] = t7[7];
|
|
|
|
z1[0] = t3[0];
|
|
z1[1] = t3[1];
|
|
z1[2] = t3[2];
|
|
z1[3] = t3[3];
|
|
z1[4] = t3[4];
|
|
z1[5] = t3[5];
|
|
z1[6] = t3[6];
|
|
z1[7] = t3[7];
|
|
}
|
|
|
|
DECLSPEC void point_get_coords (secp256k1_t *r, const u32 x[8], const u32 y[8])
|
|
{
|
|
// init the values with x and y:
|
|
|
|
u32 x1[8];
|
|
|
|
x1[0] = x[0];
|
|
x1[1] = x[1];
|
|
x1[2] = x[2];
|
|
x1[3] = x[3];
|
|
x1[4] = x[4];
|
|
x1[5] = x[5];
|
|
x1[6] = x[6];
|
|
x1[7] = x[7];
|
|
|
|
u32 y1[8];
|
|
|
|
y1[0] = y[0];
|
|
y1[1] = y[1];
|
|
y1[2] = y[2];
|
|
y1[3] = y[3];
|
|
y1[4] = y[4];
|
|
y1[5] = y[5];
|
|
y1[6] = y[6];
|
|
y1[7] = y[7];
|
|
|
|
u32 t1[8];
|
|
|
|
t1[0] = y[0];
|
|
t1[1] = y[1];
|
|
t1[2] = y[2];
|
|
t1[3] = y[3];
|
|
t1[4] = y[4];
|
|
t1[5] = y[5];
|
|
t1[6] = y[6];
|
|
t1[7] = y[7];
|
|
|
|
// we use jacobian forms and the convertion with z = 1 is basically a NO-OP:
|
|
// X = X1 * z^2 = X1, Y = Y1 * z^3 = Y1
|
|
|
|
// https://eprint.iacr.org/2011/338.pdf
|
|
|
|
// initial jacobian doubling
|
|
|
|
u32 t2[8];
|
|
u32 t3[8];
|
|
u32 t4[8];
|
|
|
|
mul_mod (t2, x1, x1); // t2 = x1^2
|
|
mul_mod (t3, y1, y1); // t3 = y1^2
|
|
|
|
mul_mod (x1, x1, t3); // x1 = x1*y1^2
|
|
|
|
mul_mod (t3, t3, t3); // t3 = t3^2 = y1^4
|
|
|
|
// here the z^2 and z^4 is not needed for a = 0 (and furthermore we have z = 1)
|
|
|
|
add_mod (y1, t2, t2); // y1 = 2 * t2 = 2 * x1^2
|
|
add_mod (t2, y1, t2); // t2 = 3 * t2 = 3 * x1^2
|
|
|
|
// a * z^4 = 0 * 1^4 = 0
|
|
|
|
// don't discard the least significant bit it's important too!
|
|
|
|
u32 c = 0;
|
|
|
|
if (t2[0] & 1)
|
|
{
|
|
u32 t[8];
|
|
|
|
t[0] = SECP256K1_P0;
|
|
t[1] = SECP256K1_P1;
|
|
t[2] = SECP256K1_P2;
|
|
t[3] = SECP256K1_P3;
|
|
t[4] = SECP256K1_P4;
|
|
t[5] = SECP256K1_P5;
|
|
t[6] = SECP256K1_P6;
|
|
t[7] = SECP256K1_P7;
|
|
|
|
c = add (t2, t2, t); // t2 + SECP256K1_P
|
|
}
|
|
|
|
// right shift (t2 / 2):
|
|
|
|
t2[0] = t2[0] >> 1 | t2[1] << 31;
|
|
t2[1] = t2[1] >> 1 | t2[2] << 31;
|
|
t2[2] = t2[2] >> 1 | t2[3] << 31;
|
|
t2[3] = t2[3] >> 1 | t2[4] << 31;
|
|
t2[4] = t2[4] >> 1 | t2[5] << 31;
|
|
t2[5] = t2[5] >> 1 | t2[6] << 31;
|
|
t2[6] = t2[6] >> 1 | t2[7] << 31;
|
|
t2[7] = t2[7] >> 1 | c << 31;
|
|
|
|
mul_mod (t4, t2, t2); // t4 = t2^2 = (3/2*x1^2)^2
|
|
|
|
add_mod (y1, x1, x1); // y1 = 2 * x1_new
|
|
|
|
sub_mod (t4, t4, y1); // t4 = t4 - y1_new
|
|
sub_mod (x1, x1, t4); // x1 = x1 - t4
|
|
|
|
mul_mod (t2, t2, x1); // t2 = t2 * x1_new
|
|
|
|
sub_mod (x1, t2, t3); // x1 = t2 - t3
|
|
|
|
// => X = t4, Y = x1, Z = t1:
|
|
// (and t2, t3 can now be safely reused)
|
|
|
|
// convert to affine coordinates (to save some bytes copied around) and store it:
|
|
|
|
u32 inv[8];
|
|
|
|
inv[0] = t1[0];
|
|
inv[1] = t1[1];
|
|
inv[2] = t1[2];
|
|
inv[3] = t1[3];
|
|
inv[4] = t1[4];
|
|
inv[5] = t1[5];
|
|
inv[6] = t1[6];
|
|
inv[7] = t1[7];
|
|
|
|
inv_mod (inv);
|
|
|
|
mul_mod (t2, inv, inv); // t2 = inv^2
|
|
mul_mod (t3, inv, t2); // t3 = inv^3
|
|
|
|
// output to y1
|
|
|
|
mul_mod (t3, t3, x1);
|
|
|
|
r->xy[31] = t3[7];
|
|
r->xy[30] = t3[6];
|
|
r->xy[29] = t3[5];
|
|
r->xy[28] = t3[4];
|
|
r->xy[27] = t3[3];
|
|
r->xy[26] = t3[2];
|
|
r->xy[25] = t3[1];
|
|
r->xy[24] = t3[0];
|
|
|
|
// output to x1
|
|
|
|
mul_mod (t3, t2, t4);
|
|
|
|
r->xy[23] = t3[7];
|
|
r->xy[22] = t3[6];
|
|
r->xy[21] = t3[5];
|
|
r->xy[20] = t3[4];
|
|
r->xy[19] = t3[3];
|
|
r->xy[18] = t3[2];
|
|
r->xy[17] = t3[1];
|
|
r->xy[16] = t3[0];
|
|
|
|
// also store orginal x/y:
|
|
|
|
r->xy[15] = y[7];
|
|
r->xy[14] = y[6];
|
|
r->xy[13] = y[5];
|
|
r->xy[12] = y[4];
|
|
r->xy[11] = y[3];
|
|
r->xy[10] = y[2];
|
|
r->xy[ 9] = y[1];
|
|
r->xy[ 8] = y[0];
|
|
|
|
r->xy[ 7] = x[7];
|
|
r->xy[ 6] = x[6];
|
|
r->xy[ 5] = x[5];
|
|
r->xy[ 4] = x[4];
|
|
r->xy[ 3] = x[3];
|
|
r->xy[ 2] = x[2];
|
|
r->xy[ 1] = x[1];
|
|
r->xy[ 0] = x[0];
|
|
|
|
|
|
// do the double of the double (i.e. "triple") too, just in case we need it in the main loop:
|
|
|
|
point_double (t4, x1, t1);
|
|
|
|
// convert to affine coordinates and store it:
|
|
|
|
inv_mod (t1);
|
|
|
|
mul_mod (t2, t1, t1); // t2 = t1^2
|
|
mul_mod (t3, t1, t2); // t3 = t1^3
|
|
|
|
// output to y2
|
|
|
|
mul_mod (t3, t3, x1);
|
|
|
|
r->xy[47] = t3[7];
|
|
r->xy[46] = t3[6];
|
|
r->xy[45] = t3[5];
|
|
r->xy[44] = t3[4];
|
|
r->xy[43] = t3[3];
|
|
r->xy[42] = t3[2];
|
|
r->xy[41] = t3[1];
|
|
r->xy[40] = t3[0];
|
|
|
|
// output to x2
|
|
|
|
mul_mod (t3, t2, t4);
|
|
|
|
r->xy[39] = t3[7];
|
|
r->xy[38] = t3[6];
|
|
r->xy[37] = t3[5];
|
|
r->xy[36] = t3[4];
|
|
r->xy[35] = t3[3];
|
|
r->xy[34] = t3[2];
|
|
r->xy[33] = t3[1];
|
|
r->xy[32] = t3[0];
|
|
}
|
|
|
|
DECLSPEC void point_mul (u32 r[9], const u32 k[8], GLOBAL_AS const secp256k1_t *tmps)
|
|
{
|
|
// first check the position of the least significant bit
|
|
|
|
// the following fancy shift operation just checks the last 2 bits, finds the
|
|
// least significant bit (set to 1) and updates idx according to this table:
|
|
// last bits | idx
|
|
// 0bxxxxxx00 | 2
|
|
// 0bxxxxxx01 | 0
|
|
// 0bxxxxxx10 | 1
|
|
// 0bxxxxxx11 | 0
|
|
|
|
const u32 idx = (0x0102 >> ((k[0] & 3) << 2)) & 3;
|
|
|
|
const u32 offset = idx << 4; // * (8 + 8) = 16 (=> offset of 16 u32 = 16 * 4 bytes)
|
|
|
|
u32 x1[8];
|
|
|
|
x1[0] = tmps->xy[offset + 0];
|
|
x1[1] = tmps->xy[offset + 1];
|
|
x1[2] = tmps->xy[offset + 2];
|
|
x1[3] = tmps->xy[offset + 3];
|
|
x1[4] = tmps->xy[offset + 4];
|
|
x1[5] = tmps->xy[offset + 5];
|
|
x1[6] = tmps->xy[offset + 6];
|
|
x1[7] = tmps->xy[offset + 7];
|
|
|
|
u32 y1[8];
|
|
|
|
y1[0] = tmps->xy[offset + 8];
|
|
y1[1] = tmps->xy[offset + 9];
|
|
y1[2] = tmps->xy[offset + 10];
|
|
y1[3] = tmps->xy[offset + 11];
|
|
y1[4] = tmps->xy[offset + 12];
|
|
y1[5] = tmps->xy[offset + 13];
|
|
y1[6] = tmps->xy[offset + 14];
|
|
y1[7] = tmps->xy[offset + 15];
|
|
|
|
u32 z1[8] = { 0 };
|
|
|
|
z1[0] = 1;
|
|
|
|
// do NOT allow to overflow the tmps->xy buffer:
|
|
|
|
u32 final_offset = offset;
|
|
|
|
if (final_offset > 16) final_offset = 16;
|
|
|
|
u32 x2[8];
|
|
|
|
x2[0] = tmps->xy[final_offset + 16];
|
|
x2[1] = tmps->xy[final_offset + 17];
|
|
x2[2] = tmps->xy[final_offset + 18];
|
|
x2[3] = tmps->xy[final_offset + 19];
|
|
x2[4] = tmps->xy[final_offset + 20];
|
|
x2[5] = tmps->xy[final_offset + 21];
|
|
x2[6] = tmps->xy[final_offset + 22];
|
|
x2[7] = tmps->xy[final_offset + 23];
|
|
|
|
u32 y2[8];
|
|
|
|
y2[0] = tmps->xy[final_offset + 24];
|
|
y2[1] = tmps->xy[final_offset + 25];
|
|
y2[2] = tmps->xy[final_offset + 26];
|
|
y2[3] = tmps->xy[final_offset + 27];
|
|
y2[4] = tmps->xy[final_offset + 28];
|
|
y2[5] = tmps->xy[final_offset + 29];
|
|
y2[6] = tmps->xy[final_offset + 30];
|
|
y2[7] = tmps->xy[final_offset + 31];
|
|
|
|
u32 z2[8] = { 0 };
|
|
|
|
z2[0] = 1;
|
|
|
|
// ... then find out the position of the most significant bit
|
|
|
|
int loop_start = idx;
|
|
int loop_end = 255;
|
|
|
|
for (int i = 255; i > 0; i--) // or use: i > idx
|
|
{
|
|
u32 idx = i >> 5; // the current u32 (each consisting of 2^5 = 32 bits) to inspect
|
|
|
|
u32 mask = 1 << (i & 0x1f);
|
|
|
|
if (k[idx] & mask) break; // found it !
|
|
|
|
loop_end--;
|
|
}
|
|
|
|
/*
|
|
* Start
|
|
*/
|
|
|
|
// "just" double until we find the first add (where the first bit is set):
|
|
|
|
for (int pos = loop_start; pos < loop_end; pos++)
|
|
{
|
|
const u32 idx = pos >> 5;
|
|
|
|
const u32 mask = 1 << (pos & 0x1f);
|
|
|
|
if (k[idx] & mask) break;
|
|
|
|
point_double (x2, y2, z2);
|
|
|
|
loop_start++;
|
|
}
|
|
|
|
// for case 0 and 1 we can skip the double (we already did it in the host)
|
|
|
|
if (idx > 1)
|
|
{
|
|
x1[0] = x2[0];
|
|
x1[1] = x2[1];
|
|
x1[2] = x2[2];
|
|
x1[3] = x2[3];
|
|
x1[4] = x2[4];
|
|
x1[5] = x2[5];
|
|
x1[6] = x2[6];
|
|
x1[7] = x2[7];
|
|
|
|
y1[0] = y2[0];
|
|
y1[1] = y2[1];
|
|
y1[2] = y2[2];
|
|
y1[3] = y2[3];
|
|
y1[4] = y2[4];
|
|
y1[5] = y2[5];
|
|
y1[6] = y2[6];
|
|
y1[7] = y2[7];
|
|
|
|
z1[0] = z2[0];
|
|
z1[1] = z2[1];
|
|
z1[2] = z2[2];
|
|
z1[3] = z2[3];
|
|
z1[4] = z2[4];
|
|
z1[5] = z2[5];
|
|
z1[6] = z2[6];
|
|
z1[7] = z2[7];
|
|
|
|
point_double (x2, y2, z2);
|
|
}
|
|
|
|
// main loop (right-to-left binary algorithm):
|
|
|
|
for (int pos = loop_start + 1; pos < loop_end; pos++)
|
|
{
|
|
u32 idx = pos >> 5;
|
|
|
|
u32 mask = 1 << (pos & 0x1f);
|
|
|
|
// add only if needed:
|
|
|
|
if (k[idx] & mask)
|
|
{
|
|
point_add (x1, y1, z1, x2, y2, z2);
|
|
}
|
|
|
|
// always double:
|
|
|
|
point_double (x2, y2, z2);
|
|
}
|
|
|
|
// handle last one:
|
|
|
|
//const u32 final_idx = loop_end >> 5;
|
|
//const u32 mask = 1 << (loop_end & 0x1f);
|
|
|
|
//if (k[final_idx] & mask)
|
|
//{
|
|
// here we just assume that we have at least 2 bits set (an initial one and one additional bit)
|
|
// this could be dangerous/wrong in some situations, but very, very, very unlikely
|
|
point_add (x1, y1, z1, x2, y2, z2);
|
|
//}
|
|
|
|
/*
|
|
* Get the corresponding affine coordinates x/y:
|
|
*
|
|
* Note:
|
|
* x1_affine = x1_jacobian / z1^2 = x1_jacobian * z1_inv^2
|
|
* y1_affine = y1_jacobian / z1^2 = y1_jacobian * z1_inv^2
|
|
*
|
|
*/
|
|
|
|
inv_mod (z1);
|
|
|
|
// z2 is just used as temporary storage to keep the unmodified z1 for calculating z1^3:
|
|
|
|
mul_mod (z2, z1, z1); // z1^2
|
|
mul_mod (x1, x1, z2); // x1_affine
|
|
|
|
mul_mod (z1, z2, z1); // z1^3
|
|
mul_mod (y1, y1, z1); // y1_affine
|
|
|
|
/*
|
|
* output:
|
|
*/
|
|
|
|
// shift by 1 byte (8 bits) to make room and add the parity/sign (for odd/even y):
|
|
|
|
r[8] = (x1[0] << 24);
|
|
r[7] = (x1[0] >> 8) | (x1[1] << 24);
|
|
r[6] = (x1[1] >> 8) | (x1[2] << 24);
|
|
r[5] = (x1[2] >> 8) | (x1[3] << 24);
|
|
r[4] = (x1[3] >> 8) | (x1[4] << 24);
|
|
r[3] = (x1[4] >> 8) | (x1[5] << 24);
|
|
r[2] = (x1[5] >> 8) | (x1[6] << 24);
|
|
r[1] = (x1[6] >> 8) | (x1[7] << 24);
|
|
r[0] = (x1[7] >> 8);
|
|
|
|
const u32 type = 0x02 | (y1[0] & 1); // (note: 0b10 | 0b01 = 0x03)
|
|
|
|
r[0] = r[0] | type << 24; // 0x02 or 0x03
|
|
}
|
|
|
|
DECLSPEC u32 parse_public (secp256k1_t *r, const u32 k[9])
|
|
{
|
|
// verify:
|
|
|
|
const u32 first_byte = k[0] & 0xff;
|
|
|
|
if ((first_byte != '\x02') && (first_byte != '\x03'))
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
// load k into x without the first byte:
|
|
|
|
u32 x[8];
|
|
|
|
x[0] = (k[7] & 0xff00) << 16 | (k[7] & 0xff0000) | (k[7] & 0xff000000) >> 16 | (k[8] & 0xff);
|
|
x[1] = (k[6] & 0xff00) << 16 | (k[6] & 0xff0000) | (k[6] & 0xff000000) >> 16 | (k[7] & 0xff);
|
|
x[2] = (k[5] & 0xff00) << 16 | (k[5] & 0xff0000) | (k[5] & 0xff000000) >> 16 | (k[6] & 0xff);
|
|
x[3] = (k[4] & 0xff00) << 16 | (k[4] & 0xff0000) | (k[4] & 0xff000000) >> 16 | (k[5] & 0xff);
|
|
x[4] = (k[3] & 0xff00) << 16 | (k[3] & 0xff0000) | (k[3] & 0xff000000) >> 16 | (k[4] & 0xff);
|
|
x[5] = (k[2] & 0xff00) << 16 | (k[2] & 0xff0000) | (k[2] & 0xff000000) >> 16 | (k[3] & 0xff);
|
|
x[6] = (k[1] & 0xff00) << 16 | (k[1] & 0xff0000) | (k[1] & 0xff000000) >> 16 | (k[2] & 0xff);
|
|
x[7] = (k[0] & 0xff00) << 16 | (k[0] & 0xff0000) | (k[0] & 0xff000000) >> 16 | (k[1] & 0xff);
|
|
|
|
u32 p[8];
|
|
|
|
p[0] = SECP256K1_P0;
|
|
p[1] = SECP256K1_P1;
|
|
p[2] = SECP256K1_P2;
|
|
p[3] = SECP256K1_P3;
|
|
p[4] = SECP256K1_P4;
|
|
p[5] = SECP256K1_P5;
|
|
p[6] = SECP256K1_P6;
|
|
p[7] = SECP256K1_P7;
|
|
|
|
// x must be smaller than p (because of y ^ 2 = x ^ 3 % p)
|
|
|
|
for (int i = 7; i >= 0; i--)
|
|
{
|
|
if (x[i] < p[i]) break;
|
|
if (x[i] > p[i]) return 1;
|
|
}
|
|
|
|
|
|
// get y^2 = x^3 + 7:
|
|
|
|
u32 b[8] = { 0 };
|
|
|
|
b[0] = SECP256K1_B;
|
|
|
|
u32 y[8];
|
|
|
|
mul_mod (y, x, x);
|
|
mul_mod (y, y, x);
|
|
add_mod (y, y, b);
|
|
|
|
// get y = sqrt (y^2):
|
|
|
|
sqrt_mod (y);
|
|
|
|
// check if it's of the correct parity that we want (odd/even):
|
|
|
|
if ((first_byte & 1) != (y[0] & 1))
|
|
{
|
|
// y2 = p - y1 (or y2 = y1 * -1)
|
|
|
|
sub_mod (y, p, y);
|
|
}
|
|
|
|
// get xy:
|
|
|
|
point_get_coords (r, x, y);
|
|
|
|
return 0;
|
|
}
|