diff options
author | Pete Bentley <prb@google.com> | 2019-08-09 14:24:27 +0000 |
---|---|---|
committer | Pete Bentley <prb@google.com> | 2019-08-09 14:24:27 +0000 |
commit | a5c947b7c91bac52eeb5086507b67e52a59ef980 (patch) | |
tree | 3725c3e206175c177a448c50d41ad2c2589a07fa /src/third_party | |
parent | 228bd6249d17f351ea66508b3ec3112ed1cbdf30 (diff) | |
download | boringssl-a5c947b7c91bac52eeb5086507b67e52a59ef980.tar.gz |
Revert "Revert "external/boringssl: Sync to 81080a729af568f7b5fde92b9170cc17065027c9.""
This reverts commit 228bd6249d17f351ea66508b3ec3112ed1cbdf30.
Reason for revert: All fixes submitted for modules affected by the ENGINE_free API change.
Change-Id: I30fafafa13ec0a6390f4a9211fbf3122a8b4865f
Diffstat (limited to 'src/third_party')
-rw-r--r-- | src/third_party/fiat/curve25519_32.h | 8 | ||||
-rw-r--r-- | src/third_party/fiat/curve25519_64.h | 8 | ||||
-rw-r--r-- | src/third_party/fiat/p256.c | 233 | ||||
-rw-r--r-- | src/third_party/fiat/p256_32.h | 8 | ||||
-rw-r--r-- | src/third_party/fiat/p256_64.h | 8 | ||||
-rw-r--r-- | src/third_party/sike/LICENSE | 21 | ||||
-rw-r--r-- | src/third_party/sike/asm/fp-armv8.pl | 915 | ||||
-rwxr-xr-x | src/third_party/sike/asm/fp-x86_64.pl | 1626 | ||||
-rw-r--r-- | src/third_party/sike/asm/fp_generic.c | 181 | ||||
-rw-r--r-- | src/third_party/sike/curve_params.c | 128 | ||||
-rw-r--r-- | src/third_party/sike/fpx.c | 283 | ||||
-rw-r--r-- | src/third_party/sike/fpx.h | 113 | ||||
-rw-r--r-- | src/third_party/sike/isogeny.c | 260 | ||||
-rw-r--r-- | src/third_party/sike/isogeny.h | 49 | ||||
-rw-r--r-- | src/third_party/sike/sike.c | 531 | ||||
-rw-r--r-- | src/third_party/sike/sike.h | 64 | ||||
-rw-r--r-- | src/third_party/sike/sike_test.cc | 251 | ||||
-rw-r--r-- | src/third_party/sike/utils.h | 145 | ||||
-rw-r--r-- | src/third_party/wycheproof_testvectors/kwp_test.txt | 1562 |
19 files changed, 6265 insertions, 129 deletions
diff --git a/src/third_party/fiat/curve25519_32.h b/src/third_party/fiat/curve25519_32.h index 820a5c9b..53772421 100644 --- a/src/third_party/fiat/curve25519_32.h +++ b/src/third_party/fiat/curve25519_32.h @@ -90,7 +90,13 @@ static void fiat_25519_subborrowx_u25(uint32_t* out1, fiat_25519_uint1* out2, fi static void fiat_25519_cmovznz_u32(uint32_t* out1, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) { fiat_25519_uint1 x1 = (!(!arg1)); uint32_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT32_C(0xffffffff)); - uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + // Note this line has been patched from the synthesized code to add value + // barriers. + // + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2)); *out1 = x3; } diff --git a/src/third_party/fiat/curve25519_64.h b/src/third_party/fiat/curve25519_64.h index 23bf361d..7c31ff99 100644 --- a/src/third_party/fiat/curve25519_64.h +++ b/src/third_party/fiat/curve25519_64.h @@ -58,7 +58,13 @@ static void fiat_25519_subborrowx_u51(uint64_t* out1, fiat_25519_uint1* out2, fi static void fiat_25519_cmovznz_u64(uint64_t* out1, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) { fiat_25519_uint1 x1 = (!(!arg1)); uint64_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff)); - uint64_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + // Note this line has been patched from the synthesized code to add value + // barriers. + // + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2)); *out1 = x3; } diff --git a/src/third_party/fiat/p256.c b/src/third_party/fiat/p256.c index ebc5de6f..23ec71f9 100644 --- a/src/third_party/fiat/p256.c +++ b/src/third_party/fiat/p256.c @@ -321,7 +321,10 @@ static void point_add(fe x3, fe y3, fe z3, const fe x1, limb_t yneq = fe_nz(r); - if (!xneq && !yneq && z1nz && z2nz) { + limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) & + ~constant_time_is_zero_w(z1nz) & + ~constant_time_is_zero_w(z2nz); + if (is_nontrivial_double) { point_double(x3, y3, z3, x1, y1, z1); return; } @@ -731,98 +734,6 @@ static char get_bit(const uint8_t *in, int i) { return (in[i >> 3] >> (i & 7)) & 1; } -// Interleaved point multiplication using precomputed point multiples: The -// small point multiples 0*P, 1*P, ..., 17*P are in p_pre_comp, the scalar -// in p_scalar, if non-NULL. If g_scalar is non-NULL, we also add this multiple -// of the generator, using certain (large) precomputed multiples in g_pre_comp. -// Output point (X, Y, Z) is stored in x_out, y_out, z_out. -static void batch_mul(fe x_out, fe y_out, fe z_out, - const uint8_t *p_scalar, const uint8_t *g_scalar, - const fe p_pre_comp[17][3]) { - // set nq to the point at infinity - fe nq[3] = {{0},{0},{0}}, ftmp, tmp[3]; - uint64_t bits; - uint8_t sign, digit; - - // Loop over both scalars msb-to-lsb, interleaving additions of multiples - // of the generator (two in each of the last 32 rounds) and additions of p - // (every 5th round). - - int skip = 1; // save two point operations in the first round - size_t i = p_scalar != NULL ? 255 : 31; - for (;;) { - // double - if (!skip) { - point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); - } - - // add multiples of the generator - if (g_scalar != NULL && i <= 31) { - // first, look 32 bits upwards - bits = get_bit(g_scalar, i + 224) << 3; - bits |= get_bit(g_scalar, i + 160) << 2; - bits |= get_bit(g_scalar, i + 96) << 1; - bits |= get_bit(g_scalar, i + 32); - // select the point to add, in constant time - select_point(bits, 16, g_pre_comp[1], tmp); - - if (!skip) { - point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, - tmp[0], tmp[1], tmp[2]); - } else { - fe_copy(nq[0], tmp[0]); - fe_copy(nq[1], tmp[1]); - fe_copy(nq[2], tmp[2]); - skip = 0; - } - - // second, look at the current position - bits = get_bit(g_scalar, i + 192) << 3; - bits |= get_bit(g_scalar, i + 128) << 2; - bits |= get_bit(g_scalar, i + 64) << 1; - bits |= get_bit(g_scalar, i); - // select the point to add, in constant time - select_point(bits, 16, g_pre_comp[0], tmp); - point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0], - tmp[1], tmp[2]); - } - - // do other additions every 5 doublings - if (p_scalar != NULL && i % 5 == 0) { - bits = get_bit(p_scalar, i + 4) << 5; - bits |= get_bit(p_scalar, i + 3) << 4; - bits |= get_bit(p_scalar, i + 2) << 3; - bits |= get_bit(p_scalar, i + 1) << 2; - bits |= get_bit(p_scalar, i) << 1; - bits |= get_bit(p_scalar, i - 1); - ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); - - // select the point to add or subtract, in constant time. - select_point(digit, 17, p_pre_comp, tmp); - fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. - fe_cmovznz(tmp[1], sign, tmp[1], ftmp); - - if (!skip) { - point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */, - tmp[0], tmp[1], tmp[2]); - } else { - fe_copy(nq[0], tmp[0]); - fe_copy(nq[1], tmp[1]); - fe_copy(nq[2], tmp[2]); - skip = 0; - } - } - - if (i == 0) { - break; - } - --i; - } - fe_copy(x_out, nq[0]); - fe_copy(y_out, nq[1]); - fe_copy(z_out, nq[2]); -} - // OPENSSL EC_METHOD FUNCTIONS // Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = @@ -890,45 +801,116 @@ static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_RAW_POINT *r, fe_to_generic(&r->Z, z); } -static void ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, - const EC_RAW_POINT *p, - const EC_SCALAR *p_scalar) { +static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_RAW_POINT *p, + const EC_SCALAR *scalar) { fe p_pre_comp[17][3]; - fe x_out, y_out, z_out; + OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); + // Precompute multiples. + fe_from_generic(p_pre_comp[1][0], &p->X); + fe_from_generic(p_pre_comp[1][1], &p->Y); + fe_from_generic(p_pre_comp[1][2], &p->Z); + for (size_t j = 2; j <= 16; ++j) { + if (j & 1) { + point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], + p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0, + p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], + p_pre_comp[j - 1][2]); + } else { + point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], + p_pre_comp[j / 2][0], p_pre_comp[j / 2][1], + p_pre_comp[j / 2][2]); + } + } + + // Set nq to the point at infinity. + fe nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3]; + + // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round. + int skip = 1; // Save two point operations in the first round. + for (size_t i = 255; i < 256; i--) { + // double + if (!skip) { + point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); + } - if (p != NULL && p_scalar != NULL) { - // We treat NULL scalars as 0, and NULL points as points at infinity, i.e., - // they contribute nothing to the linear combination. - OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); - // Precompute multiples. - fe_from_generic(p_pre_comp[1][0], &p->X); - fe_from_generic(p_pre_comp[1][1], &p->Y); - fe_from_generic(p_pre_comp[1][2], &p->Z); - for (size_t j = 2; j <= 16; ++j) { - if (j & 1) { - point_add(p_pre_comp[j][0], p_pre_comp[j][1], - p_pre_comp[j][2], p_pre_comp[1][0], - p_pre_comp[1][1], p_pre_comp[1][2], - 0, - p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], - p_pre_comp[j - 1][2]); + // do other additions every 5 doublings + if (i % 5 == 0) { + uint64_t bits = get_bit(scalar->bytes, i + 4) << 5; + bits |= get_bit(scalar->bytes, i + 3) << 4; + bits |= get_bit(scalar->bytes, i + 2) << 3; + bits |= get_bit(scalar->bytes, i + 1) << 2; + bits |= get_bit(scalar->bytes, i) << 1; + bits |= get_bit(scalar->bytes, i - 1); + uint8_t sign, digit; + ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); + + // select the point to add or subtract, in constant time. + select_point(digit, 17, (const fe(*)[3])p_pre_comp, tmp); + fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. + fe_cmovznz(tmp[1], sign, tmp[1], ftmp); + + if (!skip) { + point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */, + tmp[0], tmp[1], tmp[2]); } else { - point_double(p_pre_comp[j][0], p_pre_comp[j][1], - p_pre_comp[j][2], p_pre_comp[j / 2][0], - p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]); + fe_copy(nq[0], tmp[0]); + fe_copy(nq[1], tmp[1]); + fe_copy(nq[2], tmp[2]); + skip = 0; } } } - batch_mul(x_out, y_out, z_out, - (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL, - g_scalar != NULL ? g_scalar->bytes : NULL, - (const fe (*) [3])p_pre_comp); + fe_to_generic(&r->X, nq[0]); + fe_to_generic(&r->Y, nq[1]); + fe_to_generic(&r->Z, nq[2]); +} + +static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group, + EC_RAW_POINT *r, + const EC_SCALAR *scalar) { + // Set nq to the point at infinity. + fe nq[3] = {{0}, {0}, {0}}, tmp[3]; + + int skip = 1; // Save two point operations in the first round. + for (size_t i = 31; i < 32; i--) { + if (!skip) { + point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); + } + + // First, look 32 bits upwards. + uint64_t bits = get_bit(scalar->bytes, i + 224) << 3; + bits |= get_bit(scalar->bytes, i + 160) << 2; + bits |= get_bit(scalar->bytes, i + 96) << 1; + bits |= get_bit(scalar->bytes, i + 32); + // Select the point to add, in constant time. + select_point(bits, 16, g_pre_comp[1], tmp); + + if (!skip) { + point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0], + tmp[1], tmp[2]); + } else { + fe_copy(nq[0], tmp[0]); + fe_copy(nq[1], tmp[1]); + fe_copy(nq[2], tmp[2]); + skip = 0; + } + + // Second, look at the current position. + bits = get_bit(scalar->bytes, i + 192) << 3; + bits |= get_bit(scalar->bytes, i + 128) << 2; + bits |= get_bit(scalar->bytes, i + 64) << 1; + bits |= get_bit(scalar->bytes, i); + // Select the point to add, in constant time. + select_point(bits, 16, g_pre_comp[0], tmp); + point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0], + tmp[1], tmp[2]); + } - fe_to_generic(&r->X, x_out); - fe_to_generic(&r->Y, y_out); - fe_to_generic(&r->Z, z_out); + fe_to_generic(&r->X, nq[0]); + fe_to_generic(&r->Y, nq[1]); + fe_to_generic(&r->Z, nq[2]); } static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, @@ -1066,7 +1048,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) { ec_GFp_nistp256_point_get_affine_coordinates; out->add = ec_GFp_nistp256_add; out->dbl = ec_GFp_nistp256_dbl; - out->mul = ec_GFp_nistp256_points_mul; + out->mul = ec_GFp_nistp256_point_mul; + out->mul_base = ec_GFp_nistp256_point_mul_base; out->mul_public = ec_GFp_nistp256_point_mul_public; out->felem_mul = ec_GFp_mont_felem_mul; out->felem_sqr = ec_GFp_mont_felem_sqr; diff --git a/src/third_party/fiat/p256_32.h b/src/third_party/fiat/p256_32.h index faaa0b04..638eb5d9 100644 --- a/src/third_party/fiat/p256_32.h +++ b/src/third_party/fiat/p256_32.h @@ -77,7 +77,13 @@ static void fiat_p256_mulx_u32(uint32_t* out1, uint32_t* out2, uint32_t arg1, ui static void fiat_p256_cmovznz_u32(uint32_t* out1, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) { fiat_p256_uint1 x1 = (!(!arg1)); uint32_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT32_C(0xffffffff)); - uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + // Note this line has been patched from the synthesized code to add value + // barriers. + // + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2)); *out1 = x3; } diff --git a/src/third_party/fiat/p256_64.h b/src/third_party/fiat/p256_64.h index 8e449c6b..7d97e0a0 100644 --- a/src/third_party/fiat/p256_64.h +++ b/src/third_party/fiat/p256_64.h @@ -79,7 +79,13 @@ static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, ui static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) { fiat_p256_uint1 x1 = (!(!arg1)); uint64_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff)); - uint64_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + // Note this line has been patched from the synthesized code to add value + // barriers. + // + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2)); *out1 = x3; } diff --git a/src/third_party/sike/LICENSE b/src/third_party/sike/LICENSE new file mode 100644 index 00000000..5cf7c8db --- /dev/null +++ b/src/third_party/sike/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Microsoft Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE diff --git a/src/third_party/sike/asm/fp-armv8.pl b/src/third_party/sike/asm/fp-armv8.pl new file mode 100644 index 00000000..ce19d809 --- /dev/null +++ b/src/third_party/sike/asm/fp-armv8.pl @@ -0,0 +1,915 @@ +#! /usr/bin/env perl +# +# April 2019 +# +# Abstract: field arithmetic in aarch64 assembly for SIDH/p434 + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../../crypto/perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; +*STDOUT=*OUT; + +$PREFIX="sike"; + +$code.=<<___; +.section .rodata + +# p434 x 2 +.Lp434x2: + .quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF + .quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47 + .quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688 + +# p434 + 1 +.Lp434p1: + .quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3 + .quad 0x6CFC5FD681C52056, 0x0002341F27177344 + +.text +___ + +# Computes C0-C2 = A0 * (B0-B1) +# Inputs remain intact +sub mul64x128 { + my ($A0,$B0,$B1,$C0,$C1,$C2,$T0,$T1)=@_; + my $body=<<___; + mul $T1, $A0, $B0 + umulh $B0, $A0, $B0 + adds $C0, $C0, $C2 + adc $C1, $C1, xzr + + mul $T0, $A0, $B1 + umulh $B1, $A0, $B1 + adds $C0, $C0, $T1 + adcs $C1, $C1, $B0 + adc $C2, xzr, xzr + + adds $C1, $C1, $T0 + adc $C2, $C2, $B1 +___ + return $body; +} + +# Computes C0-C4 = A0 * (B0-B3) +# Inputs remain intact +sub mul64x256 { + my ($A0,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$C4,$T0,$T1,$T2)=@_; + my $body=<<___; + mul $C0, $A0, $B0 // C0 + umulh $T0, $A0, $B0 + + mul $C1, $A0, $B1 + umulh $T1, $A0, $B1 + adds $C1, $C1, $T0 // C1 + adc $T0, xzr, xzr + + mul $C2, $A0, $B2 + umulh $T2, $A0, $B2 + adds $T1, $T0, $T1 + adcs $C2, $C2, $T1 // C2 + adc $T0, xzr, xzr + + mul $C3, $A0, $B3 + umulh $C4, $A0, $B3 + adds $T2, $T0, $T2 + adcs $C3, $C3, $T2 // C3 + adc $C4, $C4, xzr // C4 +___ + return $body; +} + +# Computes C0-C4 = (A0-A1) * (B0-B3) +# Inputs remain intact +sub mul128x256 { + my ($A0,$A1,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$C4,$C5,$T0,$T1,$T2,$T3)=@_; + my $body=<<___; + mul $C0, $A0, $B0 // C0 + umulh $C3, $A0, $B0 + + mul $C1, $A0, $B1 + umulh $C2, $A0, $B1 + + mul $T0, $A1, $B0 + umulh $T1, $A1, $B0 + adds $C1, $C1, $C3 + adc $C2, $C2, xzr + + mul $T2, $A0, $B2 + umulh $T3, $A0, $B2 + adds $C1, $C1, $T0 // C1 + adcs $C2, $C2, $T1 + adc $C3, xzr, xzr + + mul $T0, $A1, $B1 + umulh $T1, $A1, $B1 + adds $C2, $C2, $T2 + adcs $C3, $C3, $T3 + adc $C4, xzr, xzr + + mul $T2, $A0, $B3 + umulh $T3, $A0, $B3 + adds $C2, $C2, $T0 // C2 + adcs $C3, $C3, $T1 + adc $C4, $C4, xzr + + mul $T0, $A1, $B2 + umulh $T1, $A1, $B2 + adds $C3, $C3, $T2 + adcs $C4, $C4, $T3 + adc $C5, xzr, xzr + + mul $T2, $A1, $B3 + umulh $T3, $A1, $B3 + adds $C3, $C3, $T0 // C3 + adcs $C4, $C4, $T1 + adc $C5, $C5, xzr + adds $C4, $C4, $T2 // C4 + adc $C5, $C5, $T3 // C5 + +___ + return $body; +} + +# Computes C0-C5 = (A0-A2) * (B0-B2) +# Inputs remain intact +sub mul192 { + my ($A0,$A1,$A2,$B0,$B1,$B2,$C0,$C1,$C2,$C3,$C4,$C5,$T0,$T1,$T2,$T3)=@_; + my $body=<<___; + + // A0 * B0 + mul $C0, $A0, $B0 // C0 + umulh $C3, $A0, $B0 + + // A0 * B1 + mul $C1, $A0, $B1 + umulh $C2, $A0, $B1 + + // A1 * B0 + mul $T0, $A1, $B0 + umulh $T1, $A1, $B0 + adds $C1, $C1, $C3 + adc $C2, $C2, xzr + + // A0 * B2 + mul $T2, $A0, $B2 + umulh $T3, $A0, $B2 + adds $C1, $C1, $T0 // C1 + adcs $C2, $C2, $T1 + adc $C3, xzr, xzr + + // A2 * B0 + mul $T0, $A2, $B0 + umulh $C4, $A2, $B0 + adds $C2, $C2, $T2 + adcs $C3, $C3, $C4 + adc $C4, xzr, xzr + + // A1 * B1 + mul $T2, $A1, $B1 + umulh $T1, $A1, $B1 + adds $C2, $C2, $T0 + adcs $C3, $C3, $T3 + adc $C4, $C4, xzr + + // A1 * B2 + mul $T0, $A1, $B2 + umulh $T3, $A1, $B2 + adds $C2, $C2, $T2 // C2 + adcs $C3, $C3, $T1 + adc $C4, $C4, xzr + + // A2 * B1 + mul $T2, $A2, $B1 + umulh $T1, $A2, $B1 + adds $C3, $C3, $T0 + adcs $C4, $C4, $T3 + adc $C5, xzr, xzr + + // A2 * B2 + mul $T0, $A2, $B2 + umulh $T3, $A2, $B2 + adds $C3, $C3, $T2 // C3 + adcs $C4, $C4, $T1 + adc $C5, $C5, xzr + + adds $C4, $C4, $T0 // C4 + adc $C5, $C5, $T3 // C5 +___ + return $body; +} +sub mul256_karatsuba { + my ($M,$A0,$A1,$A2,$A3,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$C4,$C5,$C6,$C7,$T0,$T1)=@_; + # (AH+AL) x (BH+BL), low part + my $mul_low=&mul64x128($A1, $C6, $T1, $C3, $C4, $C5, $C7, $A0); + # AL x BL + my $mul_albl=&mul64x128($A1, $B0, $B1, $C1, $T1, $C7, $C6, $A0); + # AH x BH + my $mul_ahbh=&mul64x128($A3, $B2, $B3, $A1, $C6, $B0, $B1, $A2); + my $body=<<___; + // A0-A1 <- AH + AL, T0 <- mask + adds $A0, $A0, $A2 + adcs $A1, $A1, $A3 + adc $T0, xzr, xzr + + // C6, T1 <- BH + BL, C7 <- mask + adds $C6, $B0, $B2 + adcs $T1, $B1, $B3 + adc $C7, xzr, xzr + + // C0-C1 <- masked (BH + BL) + sub $C2, xzr, $T0 + sub $C3, xzr, $C7 + and $C0, $C6, $C2 + and $C1, $T1, $C2 + + // C4-C5 <- masked (AH + AL), T0 <- combined carry + and $C4, $A0, $C3 + and $C5, $A1, $C3 + mul $C2, $A0, $C6 + mul $C3, $A0, $T1 + and $T0, $T0, $C7 + + // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 + adds $C0, $C4, $C0 + umulh $C4, $A0, $T1 + adcs $C1, $C5, $C1 + umulh $C5, $A0, $C6 + adc $T0, $T0, xzr + + // C2-C5 <- (AH+AL) x (BH+BL), low part + $mul_low + ldp $A0, $A1, [$M,#0] + + // C2-C5, T0 <- (AH+AL) x (BH+BL), final part + adds $C4, $C0, $C4 + umulh $C7, $A0, $B0 + umulh $T1, $A0, $B1 + adcs $C5, $C1, $C5 + mul $C0, $A0, $B0 + mul $C1, $A0, $B1 + adc $T0, $T0, xzr + + // C0-C1, T1, C7 <- AL x BL + $mul_albl + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL + mul $A0, $A2, $B2 + umulh $B0, $A2, $B2 + subs $C2, $C2, $C0 + sbcs $C3, $C3, $C1 + sbcs $C4, $C4, $T1 + mul $A1, $A2, $B3 + umulh $C6, $A2, $B3 + sbcs $C5, $C5, $C7 + sbc $T0, $T0, xzr + + // A0, A1, C6, B0 <- AH x BH + $mul_ahbh + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs $C2, $C2, $A0 + sbcs $C3, $C3, $A1 + sbcs $C4, $C4, $C6 + sbcs $C5, $C5, $B0 + sbc $T0, $T0, xzr + + adds $C2, $C2, $T1 + adcs $C3, $C3, $C7 + adcs $C4, $C4, $A0 + adcs $C5, $C5, $A1 + adcs $C6, $T0, $C6 + adc $C7, $B0, xzr +___ + return $body; +} + +# 512-bit integer multiplication using Karatsuba (two levels), +# Comba (lower level). +# Operation: c [x2] = a [x0] * b [x1] +sub mul { + # (AH+AL) x (BH+BL), low part + my $mul_kc_low=&mul256_karatsuba( + "x2", # M0 + "x3","x4","x5","x6", # A0-A3 + "x10","x11","x12","x13", # B0-B3 + "x8","x9","x19","x20","x21","x22","x23","x24", # C0-C7 + "x25","x26"); # TMP + # AL x BL + my $mul_albl=&mul256_karatsuba( + "x0", # M0f + "x3","x4","x5","x6", # A0-A3 + "x10","x11","x12","x13", # B0-B3 + "x21","x22","x23","x24","x25","x26","x27","x28",# C0-C7 + "x8","x9"); # TMP + # AH x BH + my $mul_ahbh=&mul192( + "x3","x4","x5", # A0-A2 + "x10","x11","x12", # B0-B2 + "x21","x22","x23","x24","x25","x26", # C0-C5 + "x8","x9","x27","x28"); # TMP + + my $body=<<___; + .global ${PREFIX}_mpmul + .align 4 + ${PREFIX}_mpmul: + stp x29, x30, [sp,#-96]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + + ldp x3, x4, [x0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldr x9, [x0,#48] + ldp x10, x11, [x1,#0] + ldp x12, x13, [x1,#16] + ldp x14, x15, [x1,#32] + ldr x16, [x1,#48] + + // x3-x7 <- AH + AL, x7 <- carry + adds x3, x3, x7 + adcs x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, xzr + adc x7, xzr, xzr + + // x10-x13 <- BH + BL, x8 <- carry + adds x10, x10, x14 + adcs x11, x11, x15 + adcs x12, x12, x16 + adcs x13, x13, xzr + adc x8, xzr, xzr + + // x9 <- combined carry + and x9, x7, x8 + // x7-x8 <- mask + sub x7, xzr, x7 + sub x8, xzr, x8 + + // x15-x19 <- masked (BH + BL) + and x14, x10, x7 + and x15, x11, x7 + and x16, x12, x7 + and x17, x13, x7 + + // x20-x23 <- masked (AH + AL) + and x20, x3, x8 + and x21, x4, x8 + and x22, x5, x8 + and x23, x6, x8 + + // x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1 + adds x14, x14, x20 + adcs x15, x15, x21 + adcs x16, x16, x22 + adcs x17, x17, x23 + adc x7, x9, xzr + + // x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part + stp x3, x4, [x2,#0] + $mul_kc_low + + // x15-x19, x7 <- (AH+AL) x (BH+BL), final step + adds x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x23 + adcs x17, x17, x24 + adc x7, x7, xzr + + // Load AL + ldp x3, x4, [x0] + ldp x5, x6, [x0,#16] + // Load BL + ldp x10, x11, [x1,#0] + ldp x12, x13, [x1,#16] + + // Temporarily store x8 in x2 + stp x8, x9, [x2,#0] + // x21-x28 <- AL x BL + $mul_albl + // Restore x8 + ldp x8, x9, [x2,#0] + + // x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL + subs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x19, x19, x23 + sbcs x20, x20, x24 + sbcs x14, x14, x25 + sbcs x15, x15, x26 + sbcs x16, x16, x27 + sbcs x17, x17, x28 + sbc x7, x7, xzr + + // Store ALxBL, low + stp x21, x22, [x2] + stp x23, x24, [x2,#16] + + // Load AH + ldp x3, x4, [x0,#32] + ldr x5, [x0,#48] + // Load BH + ldp x10, x11, [x1,#32] + ldr x12, [x1,#48] + + adds x8, x8, x25 + adcs x9, x9, x26 + adcs x19, x19, x27 + adcs x20, x20, x28 + adc x1, xzr, xzr + + add x0, x0, #32 + // Temporarily store x8,x9 in x2 + stp x8,x9, [x2,#32] + // x21-x28 <- AH x BH + $mul_ahbh + // Restore x8,x9 + ldp x8,x9, [x2,#32] + + neg x1, x1 + + // x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x19, x19, x23 + sbcs x20, x20, x24 + sbcs x14, x14, x25 + sbcs x15, x15, x26 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x7, x7, xzr + + // Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low + stp x8, x9, [x2,#32] + stp x19, x20, [x2,#48] + + adds x1, x1, #1 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x23 + adcs x17, x17, x24 + adcs x25, x7, x25 + adc x26, x26, xzr + + stp x14, x15, [x2,#64] + stp x16, x17, [x2,#80] + stp x25, x26, [x2,#96] + + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldp x29, x30, [sp],#96 + ret +___ + return $body; +} +$code.=&mul(); + +# Montgomery reduction +# Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +# Operation: mc [x1] = ma [x0] +# NOTE: ma=mc is not allowed +sub rdc { + my $mul01=&mul128x256( + "x2","x3", # A0-A1 + "x23","x24","x25","x26", # B0-B3 + "x4","x5","x6","x7","x8","x9", # C0-C5 + "x10","x11","x27","x28"); # TMP + my $mul23=&mul128x256( + "x2","x10", # A0-A1 + "x23","x24","x25","x26", # B0-B3 + "x4","x5","x6","x7","x8","x9", # C0-C5 + "x0","x3","x27","x28"); # TMP + my $mul45=&mul128x256( + "x11","x12", # A0-A1 + "x23","x24","x25","x26", # B0-B3 + "x4","x5","x6","x7","x8","x9", # C0-C5 + "x10","x3","x27","x28"); # TMP + my $mul67=&mul64x256( + "x13", # A0 + "x23","x24","x25","x26", # B0-B3 + "x4","x5","x6","x7","x8", # C0-C4 + "x10","x27","x28"); # TMP + my $body=<<___; + .global ${PREFIX}_fprdc + .align 4 + ${PREFIX}_fprdc: + stp x29, x30, [sp, #-96]! + add x29, sp, xzr + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + + ldp x2, x3, [x0,#0] // a[0-1] + + // Load the prime constant + adrp x26, :pg_hi21:.Lp434p1 + add x26, x26, :lo12:.Lp434p1 + ldp x23, x24, [x26, #0x0] + ldp x25, x26, [x26,#0x10] + + // a[0-1] * p434+1 + $mul01 + + ldp x10, x11, [x0, #0x18] + ldp x12, x13, [x0, #0x28] + ldp x14, x15, [x0, #0x38] + ldp x16, x17, [x0, #0x48] + ldp x19, x20, [x0, #0x58] + ldr x21, [x0, #0x68] + + adds x10, x10, x4 + adcs x11, x11, x5 + adcs x12, x12, x6 + adcs x13, x13, x7 + adcs x14, x14, x8 + adcs x15, x15, x9 + adcs x22, x16, xzr + adcs x17, x17, xzr + adcs x19, x19, xzr + adcs x20, x20, xzr + adc x21, x21, xzr + + ldr x2, [x0,#0x10] // a[2] + // a[2-3] * p434+1 + $mul23 + + adds x12, x12, x4 + adcs x13, x13, x5 + adcs x14, x14, x6 + adcs x15, x15, x7 + adcs x16, x22, x8 + adcs x17, x17, x9 + adcs x22, x19, xzr + adcs x20, x20, xzr + adc x21, x21, xzr + + $mul45 + adds x14, x14, x4 + adcs x15, x15, x5 + adcs x16, x16, x6 + adcs x17, x17, x7 + adcs x19, x22, x8 + adcs x20, x20, x9 + adc x22, x21, xzr + + stp x14, x15, [x1, #0x0] // C0, C1 + + $mul67 + adds x16, x16, x4 + adcs x17, x17, x5 + adcs x19, x19, x6 + adcs x20, x20, x7 + adc x21, x22, x8 + + str x16, [x1, #0x10] + stp x17, x19, [x1, #0x18] + stp x20, x21, [x1, #0x28] + + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldp x29, x30, [sp],#96 + ret +___ +} +$code.=&rdc(); + +# Field addition +# Operation: c [x2] = a [x0] + b [x1] +$code.=<<___; + .global ${PREFIX}_fpadd + .align 4 + ${PREFIX}_fpadd: + stp x29,x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldr x9, [x0,#48] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + ldp x15, x16, [x1,#32] + ldr x17, [x1,#48] + + // Add a + b + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + adcs x7, x7, x15 + adcs x8, x8, x16 + adc x9, x9, x17 + + // Subtract 2xp434 + adrp x17, :pg_hi21:.Lp434x2 + add x17, x17, :lo12:.Lp434x2 + ldp x11, x12, [x17, #0] + ldp x13, x14, [x17, #16] + ldp x15, x16, [x17, #32] + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x12 + sbcs x6, x6, x13 + sbcs x7, x7, x14 + sbcs x8, x8, x15 + sbcs x9, x9, x16 + sbc x0, xzr, xzr // x0 can be reused now + + // Add 2xp434 anded with the mask in x0 + and x11, x11, x0 + and x12, x12, x0 + and x13, x13, x0 + and x14, x14, x0 + and x15, x15, x0 + and x16, x16, x0 + + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x12 + adcs x6, x6, x13 + adcs x7, x7, x14 + adcs x8, x8, x15 + adc x9, x9, x16 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + str x9, [x2,#48] + + ldp x29, x30, [sp],#16 + ret +___ + +# Field subtraction +# Operation: c [x2] = a [x0] - b [x1] +$code.=<<___; + .global ${PREFIX}_fpsub + .align 4 + ${PREFIX}_fpsub: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldr x9, [x0,#48] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + ldp x15, x16, [x1,#32] + ldr x17, [x1,#48] + + // Subtract a - b + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + sbcs x7, x7, x15 + sbcs x8, x8, x16 + sbcs x9, x9, x17 + sbc x0, xzr, xzr + + // Add 2xp434 anded with the mask in x0 + adrp x17, :pg_hi21:.Lp434x2 + add x17, x17, :lo12:.Lp434x2 + + // First half + ldp x11, x12, [x17, #0] + ldp x13, x14, [x17, #16] + ldp x15, x16, [x17, #32] + + // Add 2xp434 anded with the mask in x0 + and x11, x11, x0 + and x12, x12, x0 + and x13, x13, x0 + and x14, x14, x0 + and x15, x15, x0 + and x16, x16, x0 + + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x12 + adcs x6, x6, x13 + adcs x7, x7, x14 + adcs x8, x8, x15 + adc x9, x9, x16 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + str x9, [x2,#48] + + ldp x29, x30, [sp],#16 + ret +___ + +# 434-bit multiprecision addition +# Operation: c [x2] = a [x0] + b [x1] +$code.=<<___; + .global ${PREFIX}_mpadd_asm + .align 4 + ${PREFIX}_mpadd_asm: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldr x9, [x0,#48] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + ldp x15, x16, [x1,#32] + ldr x17, [x1,#48] + + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + adcs x7, x7, x15 + adcs x8, x8, x16 + adc x9, x9, x17 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + str x9, [x2,#48] + + ldp x29, x30, [sp],#16 + ret +___ + +# 2x434-bit multiprecision subtraction +# Operation: c [x2] = a [x0] - b [x1]. +# Returns borrow mask +$code.=<<___; + .global ${PREFIX}_mpsubx2_asm + .align 4 + ${PREFIX}_mpsubx2_asm: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + sbcs x9, x9, x13 + sbcs x10, x10, x14 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x3, x4, [x0,#64] + ldp x5, x6, [x0,#80] + ldp x11, x12, [x1,#64] + ldp x13, x14, [x1,#80] + sbcs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + ldp x7, x8, [x0,#96] + ldp x11, x12, [x1,#96] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + sbc x0, xzr, xzr + + stp x3, x4, [x2,#64] + stp x5, x6, [x2,#80] + stp x7, x8, [x2,#96] + + ldp x29, x30, [sp],#16 + ret +___ + + +# Double 2x434-bit multiprecision subtraction +# Operation: c [x2] = c [x2] - a [x0] - b [x1] +$code.=<<___; + .global ${PREFIX}_mpdblsubx2_asm + .align 4 + ${PREFIX}_mpdblsubx2_asm: + stp x29, x30, [sp, #-16]! + add x29, sp, #0 + + ldp x3, x4, [x2, #0] + ldp x5, x6, [x2,#16] + ldp x7, x8, [x2,#32] + + ldp x11, x12, [x0, #0] + ldp x13, x14, [x0,#16] + ldp x15, x16, [x0,#32] + + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + sbcs x7, x7, x15 + sbcs x8, x8, x16 + + // x9 stores carry + adc x9, xzr, xzr + + ldp x11, x12, [x1, #0] + ldp x13, x14, [x1,#16] + ldp x15, x16, [x1,#32] + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + sbcs x7, x7, x15 + sbcs x8, x8, x16 + adc x9, x9, xzr + + stp x3, x4, [x2, #0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + + ldp x3, x4, [x2,#48] + ldp x5, x6, [x2,#64] + ldp x7, x8, [x2,#80] + + ldp x11, x12, [x0,#48] + ldp x13, x14, [x0,#64] + ldp x15, x16, [x0,#80] + + // x9 = 2 - x9 + neg x9, x9 + add x9, x9, #2 + + subs x3, x3, x9 + sbcs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + sbcs x7, x7, x15 + sbcs x8, x8, x16 + adc x9, xzr, xzr + + ldp x11, x12, [x1,#48] + ldp x13, x14, [x1,#64] + ldp x15, x16, [x1,#80] + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + sbcs x7, x7, x15 + sbcs x8, x8, x16 + adc x9, x9, xzr + + stp x3, x4, [x2,#48] + stp x5, x6, [x2,#64] + stp x7, x8, [x2,#80] + + ldp x3, x4, [x2,#96] + ldp x11, x12, [x0,#96] + ldp x13, x14, [x1,#96] + + // x9 = 2 - x9 + neg x9, x9 + add x9, x9, #2 + + subs x3, x3, x9 + sbcs x3, x3, x11 + sbcs x4, x4, x12 + subs x3, x3, x13 + sbc x4, x4, x14 + stp x3, x4, [x2,#96] + + ldp x29, x30, [sp],#16 + ret +___ + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + print $_,"\n"; +} + +close STDOUT; diff --git a/src/third_party/sike/asm/fp-x86_64.pl b/src/third_party/sike/asm/fp-x86_64.pl new file mode 100755 index 00000000..cffde1a8 --- /dev/null +++ b/src/third_party/sike/asm/fp-x86_64.pl @@ -0,0 +1,1626 @@ +#! /usr/bin/env perl +# +# April 2019 +# +# Abstract: field arithmetic in x64 assembly for SIDH/p434 + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; +*STDOUT=*OUT; + +$PREFIX="sike"; +$bmi2_adx = 1; + +$code.=<<___; +.text + +# p434 x 2 +.Lp434x2: +.quad 0xFFFFFFFFFFFFFFFE +.quad 0xFFFFFFFFFFFFFFFF +.quad 0xFB82ECF5C5FFFFFF +.quad 0xF78CB8F062B15D47 +.quad 0xD9F8BFAD038A40AC +.quad 0x0004683E4E2EE688 + +# p434 + 1 +.Lp434p1: +.quad 0xFDC1767AE3000000 +.quad 0x7BC65C783158AEA3 +.quad 0x6CFC5FD681C52056 +.quad 0x0002341F27177344 + +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P +___ + +# Jump to alternative implemenatation provided as an +# argument in case CPU supports ADOX/ADCX and MULX instructions. +sub alt_impl { + $jmp_func = shift; + + $body=<<___; + lea OPENSSL_ia32cap_P(%rip), %rcx + mov 8(%rcx), %rcx + and \$0x80100, %ecx + cmp \$0x80100, %ecx + je $jmp_func + +___ + return $body +} + +# Performs schoolbook multiplication of 2 192-bit numbers. Uses +# MULX instruction. Result is stored in 192 bits pointed by $DST. +sub mul192 { + my ($idxM0,$M0,$idxM1,$M1,$idxDST,$DST,$T0,$T1,$T2,$T3,$T4,$T5,$T6)=@_; + my ($ML0,$ML8,$ML16)=map("$idxM0+$_($M0)",(0,8,16)); + my ($MR0,$MR8,$MR16)=map("$idxM1+$_($M1)",(0,8,16)); + my ($D0,$D1,$D2,$D3,$D4,$D5)=map("$idxDST+$_($DST)",(0,8,16,24,32,40)); + + $body=<<___; + mov $ML0, %rdx + mulx $MR0, $T1, $T0 # T0:T1 = A0*B0 + mov $T1, $D0 # DST0 + mulx $MR8, $T2, $T1 # T1:T2 = A0*B1 + xor %rax, %rax + adox $T2, $T0 + mulx $MR16,$T3, $T2 # T2:T3 = A0*B2 + adox $T3, $T1 + + mov $ML8, %rdx + mulx $MR0, $T4, $T3 # T3:T4 = A1*B0 + adox %rax, $T2 + xor %rax, %rax + + mulx $MR8, $T6, $T5 # T6:T7 = A1*B1 + adox $T0, $T4 + mov $T4, $D1 # DST1 + adcx $T6, $T3 + + mulx $MR16,$T0, $T6 # T6:T0 = A1*B2 + adox $T1, $T3 + adcx $T0, $T5 + adcx %rax, $T6 + adox $T2, $T5 + + mov $ML16,%rdx + mulx $MR0, $T0, $T1 # T1:T0 = A2*B0 + adox %rax, $T6 + xor %rax, %rax + + mulx $MR8, $T2, $T4 # T4:T2 = A2*B1 + adox $T3, $T0 + mov $T0, $D2 # DST2 + adcx $T5, $T1 + + mulx $MR16,$T3, $T0 # T0:T3 = A2*B2 + adcx $T6, $T4 + adcx %rax, $T0 + adox $T2, $T1 + adox $T4, $T3 + adox %rax, $T0 + mov $T1, $D3 # DST3 + mov $T3, $D4 # DST4 + mov $T0, $D5 # DST5 + +___ + return $body; +} + +# Performs schoolbook multiplication of 2 256-bit numbers. Uses +# MULX instruction. Result is stored in 256 bits pointed by $DST. +sub mul256 { + my ($idxM0,$M0,$idxM1,$M1,$idxDST,$DST,$T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8,$T9)=@_; + my ($ML0,$ML8,$ML16,$ML24)=map("$idxM0+$_($M0)",(0,8,16,24)); + my ($MR0,$MR8,$MR16,$MR24)=map("$idxM1+$_($M1)",(0,8,16,24)); + my ($D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7)=map("$idxDST+$_($DST)",(0,8,16,24,32,40,48,56)); + + $body=<<___; + mov $ML0, %rdx + mulx $MR0, $T1, $T0 # T0:T1 = A0*B0 + mov $T1, $D0 # DST0_final + mulx $MR8, $T2, $T1 # T1:T2 = A0*B1 + xor %rax, %rax + adox $T2, $T0 + mulx $MR16,$T3, $T2 # T2:T3 = A0*B2 + adox $T3, $T1 + mulx $MR24,$T4, $T3 # T3:T4 = A0*B3 + adox $T4, $T2 + + mov $ML8, %rdx + mulx $MR0, $T4, $T5 # T5:T4 = A1*B0 + adox %rax, $T3 + xor %rax, %rax + mulx $MR8, $T7, $T6 # T6:T7 = A1*B1 + adox $T0, $T4 + mov $T4, $D1 # DST1_final + adcx $T7, $T5 + mulx $MR16,$T8, $T7 # T7:T8 = A1*B2 + adcx $T8, $T6 + adox $T1, $T5 + mulx $MR24,$T9, $T8 # T8:T9 = A1*B3 + adcx $T9, $T7 + adcx %rax, $T8 + adox $T2, $T6 + + mov $ML16,%rdx + mulx $MR0, $T0, $T1 # T1:T0 = A2*B0 + adox $T3, $T7 + adox %rax, $T8 + xor %rax, %rax + mulx $MR8, $T3, $T2 # T2:T3 = A2*B1 + adox $T5, $T0 + mov $T0, $D2 # DST2_final + adcx $T3, $T1 + mulx $MR16,$T4, $T3 # T3:T4 = A2*B2 + adcx $T4, $T2 + adox $T6, $T1 + mulx $MR24,$T9, $T4 # T3:T4 = A2*B3 + adcx $T9, $T3 + adcx %rax, $T4 + + adox $T7, $T2 + adox $T8, $T3 + adox %rax, $T4 + + mov $ML24,%rdx + mulx $MR0, $T0, $T5 # T5:T0 = A3*B0 + xor %rax, %rax + mulx $MR8, $T7, $T6 # T6:T7 = A3*B1 + adcx $T7, $T5 + adox $T0, $T1 + mulx $MR16, $T8, $T7 # T7:T8 = A3*B2 + adcx $T8, $T6 + adox $T5, $T2 + mulx $MR24, $T9, $T8 # T8:T9 = A3*B3 + adcx $T9, $T7 + adcx %rax, $T8 + adox $T6, $T3 + adox $T7, $T4 + adox %rax, $T8 + mov $T1, $D3 # DST3_final + mov $T2, $D4 # DST4_final + mov $T3, $D5 # DST5_final + mov $T4, $D6 # DST6_final + mov $T8, $D7 # DST7_final + +___ + return $body; +} + +# Performs schoolbook multiplication of 64-bit with 256-bit +# number. +sub mul64x256 { + my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5)=@_; + my $body.=<<___; + mov $idxM0($M0), $T5 + + xor $T2, $T2 + mov 0+$M1, %rax + mul $T5 + mov %rax, $T0 # C0 + mov %rdx, $T1 + + xor $T3, $T3 + mov 8+$M1, %rax + mul $T5 + add %rax, $T1 # C1 + adc %rdx, $T2 + + xor $T4, $T4 + mov 16+$M1, %rax + mul $T5 + add %rax, $T2 # C2 + adc %rdx, $T3 + + mov 24+$M1, %rax + mul $T5 + add %rax, $T3 # C3 + adc %rdx, $T4 # C4 +___ + return $body; +} + +# Performs schoolbook multiplication of 64-bit with 256-bit +# number. Uses MULX and ADOX instructions. +sub mulx64x256 { + my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5)=@_; + my $body.=<<___; + xor %rax, %rax + mov $idxM0($M0), %rdx + mulx 0+$M1, $T0, $T1 # T0 <- C0 + mulx 8+$M1, $T4, $T2 + mulx 16+$M1, $T5, $T3 + + adox $T4, $T1 # T1 <- C1 + adox $T5, $T2 # T2 <- C2 + + mulx 24+$M1, $T5, $T4 + adox $T5, $T3 # T3 <- C3 + adox %rax, $T4 # T4 <- C4 +___ + return $body; +} + +# Performs schoolbook multiplication of 128-bit with 256-bit +# number. Destroys RAX and RDX +sub mul128x256 { + my ($idxMA,$MA,$MB,$C0,$C1,$C2,$C3,$C4,$C5,$T0,$T1)=@_; + my ($MA0,$MA8)=map("$idxMA+$_($MA)", (0,8)); + my $body.=<<___; + # A0 x B0 + mov $MA0, $T0 + mov 0+$MB, %rax + mul $T0 + xor $C2, $C2 + mov %rax, $C0 # c0 + mov %rdx, $C1 + + # A0 x B1 + mov 8+$MB, %rax + mul $T0 + xor $C3, $C3 + add %rax, $C1 + adc %rdx, $C2 + + # A1 x B0 + mov $MA8, $T1 + mov 0+$MB, %rax + mul $T1 + add %rax, $C1 + adc %rdx, $C2 + adc \$0x0, $C3 + + # A0 x B2 + xor $C4, $C4 + mov 16+$MB, %rax + mul $T0 + add %rax, $C2 + adc %rdx, $C3 + adc \$0x0, $C4 + + # A1 x B1 + mov 8+$MB, %rax + mul $T1 + add %rax, $C2 # c2 + adc %rdx, $C3 + adc \$0x0, $C4 + + # A0 x B3 + mov 24+$MB, %rax + mul $T0 + xor $C5, $C5 + add %rax, $C3 + adc %rdx, $C4 + adc \$0x0, $C5 + + # A1 x B2 + mov 16+$MB, %rax + mul $T1 + add %rax, $C3 # c3 + adc %rdx, $C4 + adc \$0x0, $C5 + + # A1 x B3 + mov 24+$MB, %rax + mul $T1 + add %rax, $C4 + adc %rdx, $C5 + +___ + return $body; +} + +# Performs schoolbook multiplication of 128-bit with 256-bit +# number. Uses MULX, ADOX, ADCX instruction. +sub mulx128x256 { + my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5,$T6)=@_; + my ($MUL0,$MUL8)=map("$idxM0+$_($M0)", (0,8)); + my $body.=<<___; + xor %rax, %rax + mov $MUL0, %rdx + mulx 0+$M1, $T0, $T1 # T0 <- C0 + mulx 8+$M1, $T4, $T2 + mulx 16+$M1, $T5, $T3 + + adox $T4, $T1 # T1: interm1 + adox $T5, $T2 # T2: interm2 + + mulx 24+$M1, $T5, $T4 + adox $T5, $T3 # T3: interm3 + adox %rax, $T4 # T4: interm4 + + xor %rax, %rax + mov $MUL8, %rdx + mulx 0+$M1, $T5, $T6 + adcx $T5, $T1 # T1 <- C1 + adcx $T6, $T2 + + mulx 8+$M1, $T6, $T5 + adcx $T5, $T3 + adox $T6, $T2 # T2 <- C2 + + mulx 16+$M1, $T6, $T5 + adcx $T5, $T4 + adox $T6, $T3 # T3 <- C3 + + mulx 24+$M1, $T6, $T5 + adcx %rax, $T5 + adox $T6, $T4 # T4 <- C4 + adox %rax, $T5 # T5 <- C5 +___ + return $body; +} + +# Compute z = x + y (mod p). +# Operation: c [rdx] = a [rdi] + b [rsi] +$code.=<<___; +.globl ${PREFIX}_fpadd +.type ${PREFIX}_fpadd,\@function,3 +${PREFIX}_fpadd: +.cfi_startproc + push %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12, -16 + push %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13, -24 + push %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14, -32 + + xor %rax, %rax + + mov 0x0(%rdi), %r8 + add 0x0(%rsi), %r8 + mov 0x8(%rdi), %r9 + adc 0x8(%rsi), %r9 + mov 0x10(%rdi), %r10 + adc 0x10(%rsi), %r10 + mov 0x18(%rdi), %r11 + adc 0x18(%rsi), %r11 + mov 0x20(%rdi), %r12 + adc 0x20(%rsi), %r12 + mov 0x28(%rdi), %r13 + adc 0x28(%rsi), %r13 + mov 0x30(%rdi), %r14 + adc 0x30(%rsi), %r14 + + mov .Lp434x2(%rip), %rcx + sub %rcx, %r8 + mov 0x8+.Lp434x2(%rip), %rcx + sbb %rcx, %r9 + sbb %rcx, %r10 + mov 0x10+.Lp434x2(%rip), %rcx + sbb %rcx, %r11 + mov 0x18+.Lp434x2(%rip), %rcx + sbb %rcx, %r12 + mov 0x20+.Lp434x2(%rip), %rcx + sbb %rcx, %r13 + mov 0x28+.Lp434x2(%rip), %rcx + sbb %rcx, %r14 + + sbb \$0, %rax + + mov .Lp434x2(%rip), %rdi + and %rax, %rdi + mov 0x8+.Lp434x2(%rip), %rsi + and %rax, %rsi + mov 0x10+.Lp434x2(%rip), %rcx + and %rax, %rcx + + add %rdi, %r8 + mov %r8, 0x0(%rdx) + adc %rsi, %r9 + mov %r9, 0x8(%rdx) + adc %rsi, %r10 + mov %r10, 0x10(%rdx) + adc %rcx, %r11 + mov %r11, 0x18(%rdx) + + setc %cl + mov 0x18+.Lp434x2(%rip), %r8 + and %rax, %r8 + mov 0x20+.Lp434x2(%rip), %r9 + and %rax, %r9 + mov 0x28+.Lp434x2(%rip), %r10 + and %rax, %r10 + bt \$0, %rcx + + adc %r8, %r12 + mov %r12, 0x20(%rdx) + adc %r9, %r13 + mov %r13, 0x28(%rdx) + adc %r10, %r14 + mov %r14, 0x30(%rdx) + + pop %r14 +.cfi_adjust_cfa_offset -8 + pop %r13 +.cfi_adjust_cfa_offset -8 + pop %r12 +.cfi_adjust_cfa_offset -8 + ret +.cfi_endproc +___ + +# Loads data to XMM0 and XMM1 and +# conditionaly swaps depending on XMM3 +sub cswap_block16() { + my $idx = shift; + $idx *= 16; + (" + movdqu $idx(%rdi), %xmm0 + movdqu $idx(%rsi), %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm2 + pand %xmm3, %xmm2 + pxor %xmm2, %xmm0 + pxor %xmm2, %xmm1 + movdqu %xmm0, $idx(%rdi) + movdqu %xmm1, $idx(%rsi) + "); +} + +# Conditionally swaps bits in x and y in constant time. +# mask indicates bits to be swapped (set bits are swapped) +# Operation: [rdi] <-> [rsi] if rdx==1 +sub sike_cswap { + # P[0] with Q[0] + foreach ( 0.. 6){$BLOCKS.=eval "&cswap_block16($_)";} + # P[1] with Q[1] + foreach ( 7..13){$BLOCKS.=eval "&cswap_block16($_)";} + + my $body =<<___; +.globl ${PREFIX}_cswap_asm +.type ${PREFIX}_cswap_asm,\@function,3 +${PREFIX}_cswap_asm: + # Fill XMM3. After this step first half of XMM3 is + # just zeros and second half is whatever in RDX + mov %rdx, %xmm3 + + # Copy lower double word everywhere else. So that + # XMM3=RDX|RDX. As RDX has either all bits set + # or non result will be that XMM3 has also either + # all bits set or non of them. 68 = 01000100b + pshufd \$68, %xmm3, %xmm3 + $BLOCKS + ret +___ + ($body) +} +$code.=&sike_cswap(); + + +# Field subtraction +# Operation: c [rdx] = a [rdi] - b [rsi] +$code.=<<___; +.globl ${PREFIX}_fpsub +.type ${PREFIX}_fpsub,\@function,3 +${PREFIX}_fpsub: +.cfi_startproc + push %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12, -16 + push %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13, -24 + push %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14, -32 + + xor %rax, %rax + + mov 0x0(%rdi), %r8 + sub 0x0(%rsi), %r8 + mov 0x8(%rdi), %r9 + sbb 0x8(%rsi), %r9 + mov 0x10(%rdi), %r10 + sbb 0x10(%rsi), %r10 + mov 0x18(%rdi), %r11 + sbb 0x18(%rsi), %r11 + mov 0x20(%rdi), %r12 + sbb 0x20(%rsi), %r12 + mov 0x28(%rdi), %r13 + sbb 0x28(%rsi), %r13 + mov 0x30(%rdi), %r14 + sbb 0x30(%rsi), %r14 + + sbb \$0x0, %rax + + mov .Lp434x2(%rip), %rdi + and %rax, %rdi + mov 0x08+.Lp434x2(%rip), %rsi + and %rax, %rsi + mov 0x10+.Lp434x2(%rip), %rcx + and %rax, %rcx + + add %rdi, %r8 + mov %r8, 0x0(%rdx) + adc %rsi, %r9 + mov %r9, 0x8(%rdx) + adc %rsi, %r10 + mov %r10, 0x10(%rdx) + adc %rcx, %r11 + mov %r11, 0x18(%rdx) + + setc %cl + mov 0x18+.Lp434x2(%rip), %r8 + and %rax, %r8 + mov 0x20+.Lp434x2(%rip), %r9 + and %rax, %r9 + mov 0x28+.Lp434x2(%rip), %r10 + and %rax, %r10 + bt \$0x0, %rcx + + adc %r8, %r12 + adc %r9, %r13 + adc %r10, %r14 + mov %r12, 0x20(%rdx) + mov %r13, 0x28(%rdx) + mov %r14, 0x30(%rdx) + + pop %r14 +.cfi_adjust_cfa_offset -8 + pop %r13 +.cfi_adjust_cfa_offset -8 + pop %r12 +.cfi_adjust_cfa_offset -8 + ret +.cfi_endproc +___ + +# 434-bit multiprecision addition +# Operation: c [rdx] = a [rdi] + b [rsi] +$code.=<<___; +.globl ${PREFIX}_mpadd_asm +.type ${PREFIX}_mpadd_asm,\@function,3 +${PREFIX}_mpadd_asm: +.cfi_startproc + mov 0x0(%rdi), %r8; + mov 0x8(%rdi), %r9 + mov 0x10(%rdi), %r10 + mov 0x18(%rdi), %r11 + mov 0x20(%rdi), %rcx + add 0x0(%rsi), %r8 + adc 0x8(%rsi), %r9 + adc 0x10(%rsi), %r10 + adc 0x18(%rsi), %r11 + adc 0x20(%rsi), %rcx + mov %r8, 0x0(%rdx) + mov %r9, 0x8(%rdx) + mov %r10, 0x10(%rdx) + mov %r11, 0x18(%rdx) + mov %rcx, 0x20(%rdx) + + mov 0x28(%rdi), %r8 + mov 0x30(%rdi), %r9 + adc 0x28(%rsi), %r8 + adc 0x30(%rsi), %r9 + mov %r8, 0x28(%rdx) + mov %r9, 0x30(%rdx) + ret +.cfi_endproc +___ + +# 2x434-bit multiprecision subtraction +# Operation: c [rdx] = a [rdi] - b [rsi]. +# Returns borrow mask +$code.=<<___; +.globl ${PREFIX}_mpsubx2_asm +.type ${PREFIX}_mpsubx2_asm,\@function,3 +${PREFIX}_mpsubx2_asm: +.cfi_startproc + xor %rax, %rax + + mov 0x0(%rdi), %r8 + mov 0x8(%rdi), %r9 + mov 0x10(%rdi), %r10 + mov 0x18(%rdi), %r11 + mov 0x20(%rdi), %rcx + sub 0x0(%rsi), %r8 + sbb 0x8(%rsi), %r9 + sbb 0x10(%rsi), %r10 + sbb 0x18(%rsi), %r11 + sbb 0x20(%rsi), %rcx + mov %r8, 0x0(%rdx) + mov %r9, 0x8(%rdx) + mov %r10, 0x10(%rdx) + mov %r11, 0x18(%rdx) + mov %rcx, 0x20(%rdx) + + mov 0x28(%rdi), %r8 + mov 0x30(%rdi), %r9 + mov 0x38(%rdi), %r10 + mov 0x40(%rdi), %r11 + mov 0x48(%rdi), %rcx + sbb 0x28(%rsi), %r8 + sbb 0x30(%rsi), %r9 + sbb 0x38(%rsi), %r10 + sbb 0x40(%rsi), %r11 + sbb 0x48(%rsi), %rcx + mov %r8, 0x28(%rdx) + mov %r9, 0x30(%rdx) + mov %r10, 0x38(%rdx) + mov %r11, 0x40(%rdx) + mov %rcx, 0x48(%rdx) + + mov 0x50(%rdi), %r8 + mov 0x58(%rdi), %r9 + mov 0x60(%rdi), %r10 + mov 0x68(%rdi), %r11 + sbb 0x50(%rsi), %r8 + sbb 0x58(%rsi), %r9 + sbb 0x60(%rsi), %r10 + sbb 0x68(%rsi), %r11 + sbb \$0x0, %rax + mov %r8, 0x50(%rdx) + mov %r9, 0x58(%rdx) + mov %r10, 0x60(%rdx) + mov %r11, 0x68(%rdx) + ret +.cfi_endproc +___ + + +# Double 2x434-bit multiprecision subtraction +# Operation: c [rdx] = c [rdx] - a [rdi] - b [rsi] +$code.=<<___; +.globl ${PREFIX}_mpdblsubx2_asm +.type ${PREFIX}_mpdblsubx2_asm,\@function,3 +${PREFIX}_mpdblsubx2_asm: +.cfi_startproc + push %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12, -16 + push %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13, -24 + + xor %rax, %rax + + # ci:low = c:low - a:low + mov 0x0(%rdx), %r8 + mov 0x8(%rdx), %r9 + mov 0x10(%rdx), %r10 + mov 0x18(%rdx), %r11 + mov 0x20(%rdx), %r12 + mov 0x28(%rdx), %r13 + mov 0x30(%rdx), %rcx + sub 0x0(%rdi), %r8 + sbb 0x8(%rdi), %r9 + sbb 0x10(%rdi), %r10 + sbb 0x18(%rdi), %r11 + sbb 0x20(%rdi), %r12 + sbb 0x28(%rdi), %r13 + sbb 0x30(%rdi), %rcx + adc \$0x0, %rax + + # c:low = ci:low - b:low + sub 0x0(%rsi), %r8 + sbb 0x8(%rsi), %r9 + sbb 0x10(%rsi), %r10 + sbb 0x18(%rsi), %r11 + sbb 0x20(%rsi), %r12 + sbb 0x28(%rsi), %r13 + sbb 0x30(%rsi), %rcx + adc \$0x0, %rax + + # store c:low + mov %r8, 0x0(%rdx) + mov %r9, 0x8(%rdx) + mov %r10, 0x10(%rdx) + mov %r11, 0x18(%rdx) + mov %r12, 0x20(%rdx) + mov %r13, 0x28(%rdx) + mov %rcx, 0x30(%rdx) + + # ci:high = c:high - a:high + mov 0x38(%rdx), %r8 + mov 0x40(%rdx), %r9 + mov 0x48(%rdx), %r10 + mov 0x50(%rdx), %r11 + mov 0x58(%rdx), %r12 + mov 0x60(%rdx), %r13 + mov 0x68(%rdx), %rcx + + sub %rax, %r8 + sbb 0x38(%rdi), %r8 + sbb 0x40(%rdi), %r9 + sbb 0x48(%rdi), %r10 + sbb 0x50(%rdi), %r11 + sbb 0x58(%rdi), %r12 + sbb 0x60(%rdi), %r13 + sbb 0x68(%rdi), %rcx + + # c:high = ci:high - b:high + sub 0x38(%rsi), %r8 + sbb 0x40(%rsi), %r9 + sbb 0x48(%rsi), %r10 + sbb 0x50(%rsi), %r11 + sbb 0x58(%rsi), %r12 + sbb 0x60(%rsi), %r13 + sbb 0x68(%rsi), %rcx + + # store c:high + mov %r8, 0x38(%rdx) + mov %r9, 0x40(%rdx) + mov %r10, 0x48(%rdx) + mov %r11, 0x50(%rdx) + mov %r12, 0x58(%rdx) + mov %r13, 0x60(%rdx) + mov %rcx, 0x68(%rdx) + + pop %r13 +.cfi_adjust_cfa_offset -8 + pop %r12 +.cfi_adjust_cfa_offset -8 + ret +.cfi_endproc + +___ + +sub redc_common { + my ($mul01, $mul23, $mul45, $mul67)=@_; + my $body=<<___; + $mul01 + xor %rcx, %rcx + add 0x18(%rdi), %r8 + adc 0x20(%rdi), %r9 + adc 0x28(%rdi), %r10 + adc 0x30(%rdi), %r11 + adc 0x38(%rdi), %r12 + adc 0x40(%rdi), %r13 + adc 0x48(%rdi), %rcx + mov %r8, 0x18(%rdi) + mov %r9, 0x20(%rdi) + mov %r10, 0x28(%rdi) + mov %r11, 0x30(%rdi) + mov %r12, 0x38(%rdi) + mov %r13, 0x40(%rdi) + mov %rcx, 0x48(%rdi) + mov 0x50(%rdi), %r8 + mov 0x58(%rdi), %r9 + mov 0x60(%rdi), %r10 + mov 0x68(%rdi), %r11 + adc \$0x0, %r8 + adc \$0x0, %r9 + adc \$0x0, %r10 + adc \$0x0, %r11 + mov %r8, 0x50(%rdi) + mov %r9, 0x58(%rdi) + mov %r10, 0x60(%rdi) + mov %r11, 0x68(%rdi) + + $mul23 + xor %rcx, %rcx + add 0x28(%rdi), %r8 + adc 0x30(%rdi), %r9 + adc 0x38(%rdi), %r10 + adc 0x40(%rdi), %r11 + adc 0x48(%rdi), %r12 + adc 0x50(%rdi), %r13 + adc 0x58(%rdi), %rcx + mov %r8, 0x28(%rdi) + mov %r9, 0x30(%rdi) + mov %r10, 0x38(%rdi) + mov %r11, 0x40(%rdi) + mov %r12, 0x48(%rdi) + mov %r13, 0x50(%rdi) + mov %rcx, 0x58(%rdi) + mov 0x60(%rdi), %r8 + mov 0x68(%rdi), %r9 + adc \$0x0, %r8 + adc \$0x0, %r9 + mov %r8, 0x60(%rdi) + mov %r9, 0x68(%rdi) + + $mul45 + xor %rcx, %rcx + add 0x38(%rdi), %r8 + adc 0x40(%rdi), %r9 + adc 0x48(%rdi), %r10 + adc 0x50(%rdi), %r11 + adc 0x58(%rdi), %r12 + adc 0x60(%rdi), %r13 + adc 0x68(%rdi), %rcx + mov %r8, 0x0(%rsi) # C0 + mov %r9, 0x8(%rsi) # C1 + mov %r10, 0x48(%rdi) + mov %r11, 0x50(%rdi) + mov %r12, 0x58(%rdi) + mov %r13, 0x60(%rdi) + mov %rcx, 0x68(%rdi) + + $mul67 + add 0x48(%rdi), %r8 + adc 0x50(%rdi), %r9 + adc 0x58(%rdi), %r10 + adc 0x60(%rdi), %r11 + adc 0x68(%rdi), %r12 + mov %r8, 0x10(%rsi) # C2 + mov %r9, 0x18(%rsi) # C3 + mov %r10, 0x20(%rsi) # C4 + mov %r11, 0x28(%rsi) # C5 + mov %r12, 0x30(%rsi) # C6 +___ + return $body; +} + +# Optimized Montgomery reduction for CPUs, based on method described +# in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015. +# Operation: c [rsi] = a [rdi] +# NOTE: a=c is not allowed +sub sike_rdc { + my $jump_redc_bdw=&alt_impl(".Lrdc_bdw") if ($bmi2_adx); + # a[0-1] x .Lp434p1 --> result: r8:r13 + my $mulx1=&mulx128x256( 0,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13)),"%rcx"); + # a[2-3] x .Lp434p1 --> result: r8:r13 + my $mulx2=&mulx128x256(16,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13)),"%rcx"); + # a[4-5] x .Lp434p1 --> result: r8:r13 + my $mulx3=&mulx128x256(32,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13)),"%rcx"); + # a[6-7] x .Lp434p1 --> result: r8:r13 + my $mulx4=&mulx64x256( 48,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13))); + + # a[0-1] x .Lp434p1 --> result: r8:r13 + my $mul1=&mul128x256( 0,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..14)),"%rcx"); + # a[2-3] x .Lp434p1 --> result: r8:r13 + my $mul2=&mul128x256(16,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..14)),"%rcx"); + # a[4-5] x .Lp434p1 --> result: r8:r13 + my $mul3=&mul128x256(32,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..14)),"%rcx"); + # a[6-7] x .Lp434p1 --> result: r8:r13 + my $mul4=&mul64x256( 48,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13))); + + my $redc_mul=&redc_common($mul1, $mul2, $mul3, $mul4); + my $redc_bdw=&redc_common($mulx1, $mulx2, $mulx3, $mulx4) if ($bmi2_adx); + + # REDC for Broadwell CPUs + my $code=<<___; + .Lrdc_bdw: + .cfi_startproc + # sike_fprdc has already pushed r12--15 by this point. + .cfi_adjust_cfa_offset 32 + .cfi_offset r12, -16 + .cfi_offset r13, -24 + .cfi_offset r14, -32 + .cfi_offset r15, -40 + + $redc_bdw + + pop %r15 + .cfi_adjust_cfa_offset -8 + .cfi_same_value r15 + pop %r14 + .cfi_adjust_cfa_offset -8 + .cfi_same_value r14 + pop %r13 + .cfi_adjust_cfa_offset -8 + .cfi_same_value r13 + pop %r12 + .cfi_adjust_cfa_offset -8 + .cfi_same_value r12 + ret + .cfi_endproc +___ + + # REDC for CPUs older than Broadwell + $code.=<<___; + .globl ${PREFIX}_fprdc + .type ${PREFIX}_fprdc,\@function,3 + ${PREFIX}_fprdc: + .cfi_startproc + push %r12 + .cfi_adjust_cfa_offset 8 + .cfi_offset r12, -16 + push %r13 + .cfi_adjust_cfa_offset 8 + .cfi_offset r13, -24 + push %r14 + .cfi_adjust_cfa_offset 8 + .cfi_offset r14, -32 + push %r15 + .cfi_adjust_cfa_offset 8 + .cfi_offset r15, -40 + + # Jump to optimized implementation if + # CPU supports ADCX/ADOX/MULX + $jump_redc_bdw + # Otherwise use generic implementation + $redc_mul + + pop %r15 + .cfi_adjust_cfa_offset -8 + pop %r14 + .cfi_adjust_cfa_offset -8 + pop %r13 + .cfi_adjust_cfa_offset -8 + pop %r12 + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc +___ + return $code; +} +$code.=&sike_rdc(); + +# 434-bit multiplication using Karatsuba (one level), +# schoolbook (one level). Uses MULX/ADOX/ADCX instructions +# available on Broadwell micro-architectures and newer. +sub mul_bdw { + # [rsp] <- (AH+AL) x (BH+BL) + my $mul256_low=&mul256(0,"%rsp",32,"%rsp",0,"%rsp",map("%r$_",(8..15)),"%rbx","%rbp"); + # [rcx] <- AL x BL + my $mul256_albl=&mul256(0,"%rdi",0,"%rsi",0,"%rcx",map("%r$_",(8..15)),"%rbx","%rbp"); + # [rcx+64] <- AH x BH + my $mul192_ahbh=&mul192(32,"%rdi",32,"%rsi",64,"%rcx",map("%r$_",(8..14))); + + $body=<<___; + + mov %rdx, %rcx + xor %rax, %rax + + # r8-r11 <- AH + AL, rax <- mask + mov 0x0(%rdi), %r8 + mov 0x8(%rdi), %r9 + mov 0x10(%rdi), %r10 + mov 0x18(%rdi), %r11 + + push %rbx + .cfi_adjust_cfa_offset 8 + .cfi_offset rbx, -48 + push %rbp + .cfi_offset rbp, -56 + .cfi_adjust_cfa_offset 8 + sub \$96, %rsp + .cfi_adjust_cfa_offset 96 + + add 0x20(%rdi), %r8 + adc 0x28(%rdi), %r9 + adc 0x30(%rdi), %r10 + adc \$0x0, %r11 + sbb \$0x0, %rax + mov %r8, 0x0(%rsp) + mov %r9, 0x8(%rsp) + mov %r10, 0x10(%rsp) + mov %r11, 0x18(%rsp) + + # r12-r15 <- BH + BL, rbx <- mask + xor %rbx, %rbx + mov 0x0(%rsi), %r12 + mov 0x8(%rsi), %r13 + mov 0x10(%rsi), %r14 + mov 0x18(%rsi), %r15 + add 0x20(%rsi), %r12 + adc 0x28(%rsi), %r13 + adc 0x30(%rsi), %r14 + adc \$0x0, %r15 + sbb \$0x0, %rbx + mov %r12, 0x20(%rsp) + mov %r13, 0x28(%rsp) + mov %r14, 0x30(%rsp) + mov %r15, 0x38(%rsp) + + # r12-r15 <- masked (BH + BL) + and %rax, %r12 + and %rax, %r13 + and %rax, %r14 + and %rax, %r15 + + # r8-r11 <- masked (AH + AL) + and %rbx, %r8 + and %rbx, %r9 + and %rbx, %r10 + and %rbx, %r11 + + # r8-r11 <- masked (AH + AL) + masked (BH + BL) + add %r12, %r8 + adc %r13, %r9 + adc %r14, %r10 + adc %r15, %r11 + mov %r8, 0x40(%rsp) + mov %r9, 0x48(%rsp) + mov %r10, 0x50(%rsp) + mov %r11, 0x58(%rsp) + + # [rsp] <- CM = (AH+AL) x (BH+BL) + $mul256_low + # [rcx] <- CL = AL x BL (Result c0-c3) + $mul256_albl + # [rcx+64] <- CH = AH x BH + $mul192_ahbh + + # r8-r11 <- (AH+AL) x (BH+BL), final step + mov 0x40(%rsp), %r8 + mov 0x48(%rsp), %r9 + mov 0x50(%rsp), %r10 + mov 0x58(%rsp), %r11 + + mov 0x20(%rsp), %rax + add %rax, %r8 + mov 0x28(%rsp), %rax + adc %rax, %r9 + mov 0x30(%rsp), %rax + adc %rax, %r10 + mov 0x38(%rsp), %rax + adc %rax, %r11 + + # [rsp], x3-x5 <- (AH+AL) x (BH+BL) - ALxBL + mov 0x0(%rsp), %r12 + mov 0x8(%rsp), %r13 + mov 0x10(%rsp), %r14 + mov 0x18(%rsp), %r15 + sub 0x0(%rcx), %r12 + sbb 0x8(%rcx), %r13 + sbb 0x10(%rcx), %r14 + sbb 0x18(%rcx), %r15 + sbb 0x20(%rcx), %r8 + sbb 0x28(%rcx), %r9 + sbb 0x30(%rcx), %r10 + sbb 0x38(%rcx), %r11 + + # r8-r15 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + sub 0x40(%rcx), %r12 + sbb 0x48(%rcx), %r13 + sbb 0x50(%rcx), %r14 + sbb 0x58(%rcx), %r15 + sbb 0x60(%rcx), %r8 + sbb 0x68(%rcx), %r9 + sbb \$0x0, %r10 + sbb \$0x0, %r11 + + add 0x20(%rcx), %r12 + mov %r12, 0x20(%rcx) # Result C4-C7 + adc 0x28(%rcx), %r13 + mov %r13, 0x28(%rcx) + adc 0x30(%rcx), %r14 + mov %r14, 0x30(%rcx) + adc 0x38(%rcx), %r15 + mov %r15, 0x38(%rcx) + adc 0x40(%rcx), %r8 + mov %r8, 0x40(%rcx) # Result C8-C15 + adc 0x48(%rcx), %r9 + mov %r9, 0x48(%rcx) + adc 0x50(%rcx), %r10 + mov %r10, 0x50(%rcx) + adc 0x58(%rcx), %r11 + mov %r11, 0x58(%rcx) + mov 0x60(%rcx), %r12 + adc \$0x0, %r12 + mov %r12, 0x60(%rcx) + mov 0x68(%rcx), %r13 + adc \$0x0, %r13 + mov %r13, 0x68(%rcx) + + add \$96, %rsp + .cfi_adjust_cfa_offset -96 + pop %rbp + .cfi_adjust_cfa_offset -8 + .cfi_same_value rbp + pop %rbx + .cfi_adjust_cfa_offset -8 + .cfi_same_value rbx +___ + return $body; +} + +# 434-bit multiplication using Karatsuba (one level), +# schoolbook (one level). +sub mul { + my $code=<<___; + mov %rdx, %rcx + + sub \$112, %rsp # Allocating space in stack + .cfi_adjust_cfa_offset 112 + + # rcx[0-3] <- AH+AL + xor %rax, %rax + mov 0x20(%rdi), %r8 + mov 0x28(%rdi), %r9 + mov 0x30(%rdi), %r10 + xor %r11, %r11 + add 0x0(%rdi), %r8 + adc 0x8(%rdi), %r9 + adc 0x10(%rdi), %r10 + adc 0x18(%rdi), %r11 + # store AH+AL mask + sbb \$0, %rax + mov %rax, 0x40(%rsp) + # store AH+AL in 0-0x18(rcx) + mov %r8, 0x0(%rcx) + mov %r9, 0x8(%rcx) + mov %r10, 0x10(%rcx) + mov %r11, 0x18(%rcx) + + # r12-r15 <- BH+BL + xor %rdx, %rdx + mov 0x20(%rsi), %r12 + mov 0x28(%rsi), %r13 + mov 0x30(%rsi), %r14 + xor %r15, %r15 + add 0x0(%rsi), %r12 + adc 0x8(%rsi), %r13 + adc 0x10(%rsi), %r14 + adc 0x18(%rsi), %r15 + sbb \$0x0, %rdx + # store BH+BL mask + mov %rdx, 0x48(%rsp) + + # (rsp[0-0x38]) <- (AH+AL)*(BH+BL) + mov (%rcx), %rax + mul %r12 + mov %rax, (%rsp) # c0 + mov %rdx, %r8 + + xor %r9, %r9 + mov (%rcx), %rax + mul %r13 + add %rax, %r8 + adc %rdx, %r9 + + xor %r10, %r10 + mov 0x8(%rcx), %rax + mul %r12 + add %rax, %r8 + mov %r8, 0x8(%rsp) # c1 + adc %rdx, %r9 + adc \$0x0,%r10 + + xor %r8, %r8 + mov (%rcx), %rax + mul %r14 + add %rax, %r9 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov 0x10(%rcx), %rax + mul %r12 + add %rax, %r9 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov 0x8(%rcx), %rax + mul %r13 + add %rax, %r9 + mov %r9, 0x10(%rsp) # c2 + adc %rdx, %r10 + adc \$0x0, %r8 + + xor %r9, %r9 + mov (%rcx),%rax + mul %r15 + add %rax, %r10 + adc %rdx, %r8 + adc \$0x0,%r9 + + mov 0x18(%rcx), %rax + mul %r12 + add %rax, %r10 + adc %rdx, %r8 + adc \$0x0,%r9 + + mov 0x8(%rcx), %rax + mul %r14 + add %rax, %r10 + adc %rdx, %r8 + adc \$0x0,%r9 + + mov 0x10(%rcx), %rax + mul %r13 + add %rax, %r10 + mov %r10, 0x18(%rsp) # c3 + adc %rdx, %r8 + adc \$0x0, %r9 + + xor %r10, %r10 + mov 0x8(%rcx), %rax + mul %r15 + add %rax, %r8 + adc %rdx, %r9 + adc \$0x0,%r10 + + mov 0x18(%rcx), %rax + mul %r13 + add %rax, %r8 + adc %rdx, %r9 + adc \$0x0,%r10 + + mov 0x10(%rcx), %rax + mul %r14 + add %rax, %r8 # c4 + mov %r8, 0x20(%rsp) + adc %rdx, %r9 + adc \$0x0,%r10 + + xor %r11, %r11 + mov 0x10(%rcx), %rax + mul %r15 + add %rax, %r9 + adc %rdx, %r10 + adc \$0x0,%r11 + + mov 0x18(%rcx), %rax + mul %r14 + add %rax, %r9 # c5 + mov %r9, 0x28(%rsp) + adc %rdx, %r10 + adc \$0x0,%r11 + + mov 0x18(%rcx), %rax + mul %r15 + add %rax, %r10 # c6 + mov %r10, 0x30(%rsp) + adc %rdx, %r11 # c7 + mov %r11, 0x38(%rsp) + + # r12-r15 <- masked (BH + BL) + mov 0x40(%rsp), %rax + and %rax, %r12 + and %rax, %r13 + and %rax, %r14 + and %rax, %r15 + + # r8-r11 <- masked (AH + AL) + mov 0x48(%rsp),%rax + mov 0x00(rcx), %r8 + and %rax, %r8 + mov 0x08(rcx), %r9 + and %rax, %r9 + mov 0x10(rcx), %r10 + and %rax, %r10 + mov 0x18(rcx), %r11 + and %rax, %r11 + + # r12-r15 <- masked (AH + AL) + masked (BH + BL) + add %r8, %r12 + adc %r9, %r13 + adc %r10, %r14 + adc %r11, %r15 + + # rsp[0x20-0x38] <- (AH+AL) x (BH+BL) high + mov 0x20(%rsp), %rax + add %rax, %r12 + mov 0x28(%rsp), %rax + adc %rax, %r13 + mov 0x30(%rsp), %rax + adc %rax, %r14 + mov 0x38(%rsp), %rax + adc %rax, %r15 + mov %r12, 0x50(%rsp) + mov %r13, 0x58(%rsp) + mov %r14, 0x60(%rsp) + mov %r15, 0x68(%rsp) + + # [rcx] <- CL = AL x BL + mov (%rdi), %r11 + mov (%rsi), %rax + mul %r11 + xor %r9, %r9 + mov %rax, (%rcx) # c0 + mov %rdx, %r8 + + mov 0x10(%rdi), %r14 + mov 0x8(%rsi), %rax + mul %r11 + xor %r10, %r10 + add %rax, %r8 + adc %rdx, %r9 + + mov 0x8(%rdi), %r12 + mov (%rsi), %rax + mul %r12 + add %rax, %r8 + mov %r8, 0x8(%rcx) # c1 + adc %rdx, %r9 + adc \$0x0,%r10 + + xor %r8, %r8 + mov 0x10(%rsi), %rax + mul %r11 + add %rax, %r9 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov (%rsi),%r13 + mov %r14, %rax + mul %r13 + add %rax, %r9 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov 0x8(%rsi), %rax + mul %r12 + add %rax, %r9 + mov %r9, 0x10(%rcx) # c2 + adc %rdx, %r10 + adc \$0x0,%r8 + + xor %r9, %r9 + mov 0x18(%rsi), %rax + mul %r11 + mov 0x18(%rdi), %r15 + add %rax, %r10 + adc %rdx, %r8 + adc \$0x0,%r9 + + mov %r15, %rax + mul %r13 + add %rax, %r10 + adc %rdx, %r8 + adc \$0x0,%r9 + + mov 0x10(%rsi), %rax + mul %r12 + add %rax, %r10 + adc %rdx, %r8 + adc \$0x0,%r9 + + mov 0x8(%rsi), %rax + mul %r14 + add %rax, %r10 + mov %r10, 0x18(%rcx) # c3 + adc %rdx, %r8 + adc \$0x0,%r9 + + xor %r10, %r10 + mov 0x18(%rsi), %rax + mul %r12 + add %rax, %r8 + adc %rdx, %r9 + adc \$0x0,%r10 + + mov 0x8(%rsi), %rax + mul %r15 + add %rax, %r8 + adc %rdx, %r9 + adc \$0x0,%r10 + + mov 0x10(%rsi), %rax + mul %r14 + add %rax, %r8 + mov %r8, 0x20(%rcx) # c4 + adc %rdx, %r9 + adc \$0x0,%r10 + + xor %r8, %r8 + mov 0x18(%rsi), %rax + mul %r14 + add %rax, %r9 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov 0x10(%rsi), %rax + mul %r15 + add %rax, %r9 + mov %r9, 0x28(%rcx) # c5 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov 0x18(%rsi), %rax + mul %r15 + add %rax, %r10 + mov %r10, 0x30(%rcx) # c6 + adc %rdx, %r8 + mov %r8, 0x38(%rcx) # c7 + + # rcx[0x40-0x68] <- AH*BH + # multiplies 2 192-bit numbers A,B + mov 0x20(%rdi), %r11 + mov 0x20(%rsi), %rax + mul %r11 + xor %r9, %r9 + mov %rax, 0x40(%rcx) # c0 + mov %rdx, %r8 + + mov 0x30(%rdi), %r14 + mov 0x28(%rsi), %rax + mul %r11 + xor %r10, %r10 + add %rax, %r8 + adc %rdx, %r9 + + mov 0x28(%rdi), %r12 + mov 0x20(%rsi), %rax + mul %r12 + add %rax, %r8 + mov %r8, 0x48(%rcx) # c1 + adc %rdx, %r9 + adc \$0x0,%r10 + + xor %r8, %r8 + mov 0x30(%rsi), %rax + mul %r11 + add %rax, %r9 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov 0x20(%rsi), %r13 + mov %r14, %rax + mul %r13 + add %rax, %r9 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov 0x28(%rsi), %rax + mul %r12 + add %rax, %r9 + mov %r9, 0x50(%rcx) # c2 + adc %rdx, %r10 + adc \$0x0,%r8 + + mov 0x30(%rsi), %rax + mul %r12 + xor %r12, %r12 + add %rax, %r10 + adc %rdx, %r8 + adc \$0x0,%r12 + + mov 0x28(%rsi), %rax + mul %r14 + add %rax, %r10 + adc %rdx, %r8 + adc \$0x0,%r12 + mov %r10, 0x58(%rcx) # c3 + + mov 0x30(%rsi), %rax + mul %r14 + add %rax, %r8 + adc \$0x0,%r12 + mov %r8, 0x60(%rcx) # c4 + + add %r12, %rdx # c5 + + # [r8-r15] <- (AH+AL)x(BH+BL) - ALxBL + mov 0x0(%rsp), %r8 + sub 0x0(%rcx), %r8 + mov 0x8(%rsp), %r9 + sbb 0x8(%rcx), %r9 + mov 0x10(%rsp), %r10 + sbb 0x10(%rcx), %r10 + mov 0x18(%rsp), %r11 + sbb 0x18(%rcx), %r11 + mov 0x50(%rsp), %r12 + sbb 0x20(%rcx), %r12 + mov 0x58(%rsp), %r13 + sbb 0x28(%rcx), %r13 + mov 0x60(%rsp), %r14 + sbb 0x30(%rcx), %r14 + mov 0x68(%rsp), %r15 + sbb 0x38(%rcx), %r15 + + # [r8-r15] <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + mov 0x40(%rcx), %rax + sub %rax, %r8 + mov 0x48(%rcx), %rax + sbb %rax, %r9 + mov 0x50(%rcx), %rax + sbb %rax, %r10 + mov 0x58(%rcx), %rax + sbb %rax, %r11 + mov 0x60(%rcx), %rax + sbb %rax, %r12 + sbb %rdx, %r13 + sbb \$0x0,%r14 + sbb \$0x0,%r15 + + # Final result + add 0x20(%rcx), %r8 + mov %r8, 0x20(%rcx) # Result C4-C7 + adc 0x28(%rcx), %r9 + mov %r9, 0x28(%rcx) + adc 0x30(%rcx), %r10 + mov %r10, 0x30(%rcx) + adc 0x38(%rcx), %r11 + mov %r11, 0x38(%rcx) + adc 0x40(%rcx), %r12 + mov %r12, 0x40(%rcx) # Result C8-C13 + adc 0x48(%rcx), %r13 + mov %r13, 0x48(%rcx) + adc 0x50(%rcx), %r14 + mov %r14, 0x50(%rcx) + adc 0x58(%rcx), %r15 + mov %r15, 0x58(%rcx) + mov 0x60(%rcx), %r12 + adc \$0x0, %r12 + mov %r12, 0x60(%rcx) + adc \$0x0, %rdx + mov %rdx, 0x68(%rcx) + + add \$112, %rsp # Restoring space in stack + .cfi_adjust_cfa_offset -112 +___ + + return $code; +} + +# Integer multiplication based on Karatsuba method +# Operation: c [rdx] = a [rdi] * b [rsi] +# NOTE: a=c or b=c are not allowed +sub sike_mul { + my $jump_mul_bdw=&alt_impl(".Lmul_bdw") if ($bmi2_adx); + # MUL for Broadwell CPUs + my $mul_bdw=&mul_bdw() if ($bmi2_adx); + # MUL for CPUs older than Broadwell + my $mul=&mul(); + + my $body=<<___; + .Lmul_bdw: + .cfi_startproc + # sike_mpmul has already pushed r12--15 by this point. + .cfi_adjust_cfa_offset 32 + .cfi_offset r12, -16 + .cfi_offset r13, -24 + .cfi_offset r14, -32 + .cfi_offset r15, -40 + + $mul_bdw + + pop %r15 + .cfi_adjust_cfa_offset -8 + .cfi_same_value r15 + pop %r14 + .cfi_adjust_cfa_offset -8 + .cfi_same_value r14 + pop %r13 + .cfi_adjust_cfa_offset -8 + .cfi_same_value r13 + pop %r12 + .cfi_adjust_cfa_offset -8 + .cfi_same_value r12 + ret + .cfi_endproc + + .globl ${PREFIX}_mpmul + .type ${PREFIX}_mpmul,\@function,3 + ${PREFIX}_mpmul: + .cfi_startproc + push %r12 + .cfi_adjust_cfa_offset 8 + .cfi_offset r12, -16 + push %r13 + .cfi_adjust_cfa_offset 8 + .cfi_offset r13, -24 + push %r14 + .cfi_adjust_cfa_offset 8 + .cfi_offset r14, -32 + push %r15 + .cfi_adjust_cfa_offset 8 + .cfi_offset r15, -40 + + # Jump to optimized implementation if + # CPU supports ADCX/ADOX/MULX + $jump_mul_bdw + # Otherwise use generic implementation + $mul + + pop %r15 + .cfi_adjust_cfa_offset -8 + pop %r14 + .cfi_adjust_cfa_offset -8 + pop %r13 + .cfi_adjust_cfa_offset -8 + pop %r12 + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc + +___ + return $body; +} + +$code.=&sike_mul(); + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + print $_,"\n"; +} + +close STDOUT; diff --git a/src/third_party/sike/asm/fp_generic.c b/src/third_party/sike/asm/fp_generic.c new file mode 100644 index 00000000..991ad1e1 --- /dev/null +++ b/src/third_party/sike/asm/fp_generic.c @@ -0,0 +1,181 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: portable modular arithmetic for P503 +*********************************************************************************************/ + +#include <openssl/base.h> + +#if defined(OPENSSL_NO_ASM) || \ + (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) + +#include "../utils.h" +#include "../fpx.h" + +// Global constants +extern const struct params_t sike_params; + +static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + crypto_word_t al, ah, bl, bh, temp; + crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4); + crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(crypto_word_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(crypto_word_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(crypto_word_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(crypto_word_t) * 4); + c[0] ^= temp << (sizeof(crypto_word_t) * 4); // C01 + + res1 = ahbl >> (sizeof(crypto_word_t) * 4); + res2 = albh >> (sizeof(crypto_word_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + +void sike_fpadd(const felm_t a, const felm_t b, felm_t c) +{ // Modular addition, c = a+b mod p434. + // Inputs: a, b in [0, 2*p434-1] + // Output: c in [0, 2*p434-1] + unsigned int i, carry = 0; + crypto_word_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], sike_params.prime_x2[i], carry, c[i]); + } + mask = 0 - (crypto_word_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], sike_params.prime_x2[i] & mask, carry, c[i]); + } +} + +void sike_fpsub(const felm_t a, const felm_t b, felm_t c) +{ // Modular subtraction, c = a-b mod p434. + // Inputs: a, b in [0, 2*p434-1] + // Output: c in [0, 2*p434-1] + unsigned int i, borrow = 0; + crypto_word_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (crypto_word_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], sike_params.prime_x2[i] & mask, borrow, c[i]); + } +} + +void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD. + unsigned int i, j; + crypto_word_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*NWORDS_FIELD-1] = v; +} + +void sike_fprdc(felm_t ma, felm_t mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434. + // mc = ma*R^-1 mod p434x2, where R = 2^448. + // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = ZERO_WORDS; + crypto_word_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-ZERO_WORDS+1)) { + MUL(mc[j], sike_params.prime_p1[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], sike_params.prime_p1[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} + +#endif // NO_ASM || (!X86_64 && !AARCH64) diff --git a/src/third_party/sike/curve_params.c b/src/third_party/sike/curve_params.c new file mode 100644 index 00000000..a1fbb3f1 --- /dev/null +++ b/src/third_party/sike/curve_params.c @@ -0,0 +1,128 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: supersingular isogeny parameters and generation of functions for P434 +*********************************************************************************************/ + +#include "utils.h" + +// Parameters for isogeny system "SIKE" +const struct params_t sike_params = { + .prime = { + U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), + U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF), + U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056), + U64_TO_WORDS(0x0002341F27177344) + }, + .prime_p1 = { + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000), + U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056), + U64_TO_WORDS(0x0002341F27177344) + }, + .prime_x2 = { + U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), + U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF), + U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC), + U64_TO_WORDS(0x0004683E4E2EE688) + }, + .A_gen = { + U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B), + U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1), + U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F), + U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0 + U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B), + U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA), + U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C), + U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1 + U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6), + U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03), + U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215), + U64_TO_WORDS(0x0001C4CB77542876), // XQA0 + U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050), + U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374), + U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508), + U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1 + U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611), + U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD), + U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3), + U64_TO_WORDS(0x00022A81D8D55643), // XRA0 + U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED), + U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4), + U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2), + U64_TO_WORDS(0x0000B87FC716C0C6) // XRA1 + }, + .B_gen = { + U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05), + U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F), + U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9), + U64_TO_WORDS(0x0001BED4772E551F), // XPB0 + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), // XPB1 + U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C), + U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62), + U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F), + U64_TO_WORDS(0x000034080181D8AE), // XQB0 + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), // XQB1 + U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647), + U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D), + U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504), + U64_TO_WORDS(0x00007E8A50F02E37), // XRB0 + U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230), + U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53), + U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B), + U64_TO_WORDS(0x000173FA910377D3) // XRB1 + }, + .mont_R2 = { + U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2), + U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B), + U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A), + U64_TO_WORDS(0x000025A89BCDD12A) + }, + .mont_one = { + U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000), + U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C), + U64_TO_WORDS(0x0000ECEEA7BD2EDA) + }, + .mont_six = { + U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000), + U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0), + U64_TO_WORDS(0x00012559A0403298) + }, + .A_strat = { + 0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, + 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, + 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, + 0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04, + 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, + 0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, + 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01, + 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03, + 0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04, + 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01 + }, + .B_strat = { + 0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01, + 0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, + 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10, + 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, + 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, + 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, + 0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, + 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, + 0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01, + 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, + 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, + 0x02, 0x01, 0x01, 0x02, 0x01, 0x01 + } +}; diff --git a/src/third_party/sike/fpx.c b/src/third_party/sike/fpx.c new file mode 100644 index 00000000..9917116c --- /dev/null +++ b/src/third_party/sike/fpx.c @@ -0,0 +1,283 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: core functions over GF(p) and GF(p^2) +*********************************************************************************************/ +#include <openssl/base.h> + +#include "utils.h" +#include "fpx.h" + +extern const struct params_t sike_params; + +// Multiprecision squaring, c = a^2 mod p. +static void fpsqr_mont(const felm_t ma, felm_t mc) +{ + dfelm_t temp = {0}; + sike_mpmul(ma, ma, temp); + sike_fprdc(temp, mc); +} + +// Chain to compute a^(p-3)/4 using Montgomery arithmetic. +static void fpinv_chain_mont(felm_t a) +{ + unsigned int i, j; + felm_t t[31], tt; + + // Precomputed table + fpsqr_mont(a, tt); + sike_fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]); + + sike_fpcopy(a, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[5], tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[14], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[7], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[12], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[1], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[30], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[21], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[2], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[19], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[1], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[26], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[16], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[10], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[0], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[20], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[25], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[26], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(a, tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[28], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[10], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[22], tt, tt); + for (j = 0; j < 35; j++) { + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[30], tt, tt); + } + sike_fpcopy(tt, a); +} + +// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. +static void fpinv_mont(felm_t a) +{ + felm_t tt = {0}; + sike_fpcopy(a, tt); + fpinv_chain_mont(tt); + fpsqr_mont(tt, tt); + fpsqr_mont(tt, tt); + sike_fpmul_mont(a, tt, a); +} + +// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. +#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) +inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { + uint8_t carry = 0; + for (size_t i = 0; i < nwords; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + return carry; +} + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. +inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { + uint32_t borrow = 0; + for (size_t i = 0; i < nwords; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + return borrow; +} +#endif + +// Multiprecision addition, c = a+b. +inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c) +{ +#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) + mp_add(a, b, c, NWORDS_FIELD); +#else + sike_mpadd_asm(a, b, c); +#endif +} + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. +// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 +inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { +#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) + return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); +#else + return sike_mpsubx2_asm(a, b, c); +#endif +} + +// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. +// Inputs should be s.t. c > a and c > b +inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { +#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) + mp_sub(c, a, c, 2*NWORDS_FIELD); + mp_sub(c, b, c, 2*NWORDS_FIELD); +#else + sike_mpdblsubx2_asm(a, b, c); +#endif +} + +// Copy a field element, c = a. +void sike_fpcopy(const felm_t a, felm_t c) { + for (size_t i = 0; i < NWORDS_FIELD; i++) { + c[i] = a[i]; + } +} + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768 +void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) +{ + dfelm_t temp = {0}; + sike_mpmul(ma, mb, temp); + sike_fprdc(temp, mc); +} + +// Conversion from Montgomery representation to standard representation, +// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. +void sike_from_mont(const felm_t ma, felm_t c) +{ + felm_t one = {0}; + one[0] = 1; + + sike_fpmul_mont(ma, one, c); + sike_fpcorrection(c); +} + +// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). +// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] +// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] +void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) { + felm_t t1, t2, t3; + + mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1 + sike_fpsub(a->c0, a->c1, t2); // t2 = a0-a1 + mp_addfast(a->c0, a->c0, t3); // t3 = 2a0 + sike_fpmul_mont(t1, t2, c->c0); // c0 = (a0+a1)(a0-a1) + sike_fpmul_mont(t3, a->c1, c->c1); // c1 = 2a0*a1 +} + +// Modular negation, a = -a mod p503. +// Input/output: a in [0, 2*p503-1] +void sike_fpneg(felm_t a) { + uint32_t borrow = 0; + for (size_t i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, sike_params.prime_x2[i], a[i], borrow, a[i]); + } +} + +// Modular division by two, c = a/2 mod p503. +// Input : a in [0, 2*p503-1] +// Output: c in [0, 2*p503-1] +void sike_fpdiv2(const felm_t a, felm_t c) { + uint32_t carry = 0; + crypto_word_t mask; + + mask = 0 - (crypto_word_t)(a[0] & 1); // If a is odd compute a+p503 + for (size_t i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], sike_params.prime[i] & mask, carry, c[i]); + } + + // Multiprecision right shift by one. + for (size_t i = 0; i < NWORDS_FIELD-1; i++) { + c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1)); + } + c[NWORDS_FIELD-1] >>= 1; +} + +// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. +void sike_fpcorrection(felm_t a) { + uint32_t borrow = 0; + crypto_word_t mask; + + for (size_t i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], sike_params.prime[i], borrow, a[i]); + } + mask = 0 - (crypto_word_t)borrow; + + borrow = 0; + for (size_t i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], sike_params.prime[i] & mask, borrow, a[i]); + } +} + +// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). +// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] +// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] +void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) { + felm_t t1, t2; + dfelm_t tt1, tt2, tt3; + crypto_word_t mask; + + mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1 + mp_addfast(b->c0, b->c1, t2); // t2 = b0+b1 + sike_mpmul(a->c0, b->c0, tt1); // tt1 = a0*b0 + sike_mpmul(a->c1, b->c1, tt2); // tt2 = a1*b1 + sike_mpmul(t1, t2, tt3); // tt3 = (a0+a1)*(b0+b1) + mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + mask = mp_subfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0 + + for (size_t i = 0; i < NWORDS_FIELD; i++) { + t1[i] = sike_params.prime[i] & mask; + } + + sike_fprdc(tt3, c->c1); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]); + sike_fprdc(tt1, c->c0); // c[0] = a0*b0 - a1*b1 +} + +// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). +void sike_fp2inv_mont(f2elm_t a) { + f2elm_t t1; + + fpsqr_mont(a->c0, t1->c0); // t10 = a0^2 + fpsqr_mont(a->c1, t1->c1); // t11 = a1^2 + sike_fpadd(t1->c0, t1->c1, t1->c0); // t10 = a0^2+a1^2 + fpinv_mont(t1->c0); // t10 = (a0^2+a1^2)^-1 + sike_fpneg(a->c1); // a = a0-i*a1 + sike_fpmul_mont(a->c0, t1->c0, a->c0); + sike_fpmul_mont(a->c1, t1->c0, a->c1); // a = (a0-i*a1)*(a0^2+a1^2)^-1 +} diff --git a/src/third_party/sike/fpx.h b/src/third_party/sike/fpx.h new file mode 100644 index 00000000..e6976885 --- /dev/null +++ b/src/third_party/sike/fpx.h @@ -0,0 +1,113 @@ +#ifndef FPX_H_ +#define FPX_H_ + +#include "utils.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +// Modular addition, c = a+b mod p. +void sike_fpadd(const felm_t a, const felm_t b, felm_t c); +// Modular subtraction, c = a-b mod p. +void sike_fpsub(const felm_t a, const felm_t b, felm_t c); +// Modular division by two, c = a/2 mod p. +void sike_fpdiv2(const felm_t a, felm_t c); +// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1]. +void sike_fpcorrection(felm_t a); +// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. +void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c); +// 443-bit Montgomery reduction, c = a mod p. Buffer 'a' is modified after +// call returns. +void sike_fprdc(dfelm_t a, felm_t c); +// Double 2x443-bit multiprecision subtraction, c = c-a-b +void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c); +// Multiprecision subtraction, c = a-b +crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c); +// 443-bit multiprecision addition, c = a+b +void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c); +// Modular negation, a = -a mod p. +void sike_fpneg(felm_t a); +// Copy of a field element, c = a +void sike_fpcopy(const felm_t a, felm_t c); +// Copy a field element, c = a. +void sike_fpzero(felm_t a); +// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time. +void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option); +// Conversion from Montgomery representation to standard representation, +// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. +void sike_from_mont(const felm_t ma, felm_t c); +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768 +void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc); +// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2) +void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); +// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void sike_fp2inv_mont(f2elm_t a); +// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). +void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c); +// Modular correction, a = a in GF(p^2). +void sike_fp2correction(f2elm_t a); + +#if defined(__cplusplus) +} // extern C +#endif + +// GF(p^2) addition, c = a+b in GF(p^2). +#define sike_fp2add(a, b, c) \ +do { \ + sike_fpadd(a->c0, b->c0, c->c0); \ + sike_fpadd(a->c1, b->c1, c->c1); \ +} while(0) + +// GF(p^2) subtraction, c = a-b in GF(p^2). +#define sike_fp2sub(a,b,c) \ +do { \ + sike_fpsub(a->c0, b->c0, c->c0); \ + sike_fpsub(a->c1, b->c1, c->c1); \ +} while(0) + +// Copy a GF(p^2) element, c = a. +#define sike_fp2copy(a, c) \ +do { \ + sike_fpcopy(a->c0, c->c0); \ + sike_fpcopy(a->c1, c->c1); \ +} while(0) + +// GF(p^2) negation, a = -a in GF(p^2). +#define sike_fp2neg(a) \ +do { \ + sike_fpneg(a->c0); \ + sike_fpneg(a->c1); \ +} while(0) + +// GF(p^2) division by two, c = a/2 in GF(p^2). +#define sike_fp2div2(a, c) \ +do { \ + sike_fpdiv2(a->c0, c->c0); \ + sike_fpdiv2(a->c1, c->c1); \ +} while(0) + +// Modular correction, a = a in GF(p^2). +#define sike_fp2correction(a) \ +do { \ + sike_fpcorrection(a->c0); \ + sike_fpcorrection(a->c1); \ +} while(0) + +// Conversion of a GF(p^2) element to Montgomery representation, +// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). +#define sike_to_fp2mont(a, mc) \ + do { \ + sike_fpmul_mont(a->c0, sike_params.mont_R2, mc->c0); \ + sike_fpmul_mont(a->c1, sike_params.mont_R2, mc->c1); \ + } while (0) + +// Conversion of a GF(p^2) element from Montgomery representation to standard representation, +// c_i = ma_i*R^(-1) = a_i in GF(p^2). +#define sike_from_fp2mont(ma, c) \ +do { \ + sike_from_mont(ma->c0, c->c0); \ + sike_from_mont(ma->c1, c->c1); \ +} while(0) + +#endif // FPX_H_ diff --git a/src/third_party/sike/isogeny.c b/src/third_party/sike/isogeny.c new file mode 100644 index 00000000..6b910e02 --- /dev/null +++ b/src/third_party/sike/isogeny.c @@ -0,0 +1,260 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: elliptic curve and isogeny functions +*********************************************************************************************/ +#include "utils.h" +#include "isogeny.h" +#include "fpx.h" + +static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + f2elm_t t0, t1; + + sike_fp2sub(P->X, P->Z, t0); // t0 = X1-Z1 + sike_fp2add(P->X, P->Z, t1); // t1 = X1+Z1 + sike_fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 + sike_fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 + sike_fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 + sike_fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + sike_fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + sike_fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + sike_fp2add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + sike_fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + +void sike_xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e) +{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q <- (2^e)*P. + + memmove(Q, P, sizeof(*P)); + for (size_t i = 0; i < e; i++) { + xDBL(Q, Q, A24plus, C24); + } +} + +void sike_get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff) +{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. + // Input: projective point of order four P = (X4:Z4). + // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients + // that are used to evaluate the isogeny at a point in eval_4_isog(). + + sike_fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4 + sike_fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4 + sike_fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2 + sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2 + sike_fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4 + sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2 + sike_fp2sqr_mont(P->X, A24plus); // A24plus = X4^2 + sike_fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 + sike_fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 +} + +void sike_eval_4_isog(point_proj_t P, f2elm_t* coeff) +{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined + // by the 3 coefficients in coeff (computed in the function get_4_isog()). + // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). + // Output: the projective point P = phi(P) = (X:Z) in the codomain. + f2elm_t t0, t1; + + sike_fp2add(P->X, P->Z, t0); // t0 = X+Z + sike_fp2sub(P->X, P->Z, t1); // t1 = X-Z + sike_fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1] + sike_fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2] + sike_fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) + sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z) + sike_fp2add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] + sike_fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] + sike_fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + sike_fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 + sike_fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + sike_fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) + sike_fp2mul_mont(P->X, t1, P->X); // Xfinal + sike_fp2mul_mont(P->Z, t0, P->Z); // Zfinal +} + + +void sike_xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus) +{ // Tripling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). + f2elm_t t0, t1, t2, t3, t4, t5, t6; + + sike_fp2sub(P->X, P->Z, t0); // t0 = X-Z + sike_fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 + sike_fp2add(P->X, P->Z, t1); // t1 = X+Z + sike_fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 + sike_fp2add(t0, t1, t4); // t4 = 2*X + sike_fp2sub(t1, t0, t0); // t0 = 2*Z + sike_fp2sqr_mont(t4, t1); // t1 = 4*X^2 + sike_fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 + sike_fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + sike_fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2 + sike_fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3 + sike_fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 + sike_fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3 + sike_fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3 + sike_fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 + sike_fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + sike_fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3 + sike_fp2sqr_mont(t2, t2); // t2 = t2^2 + sike_fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2 + sike_fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + sike_fp2sqr_mont(t1, t1); // t1 = t1^2 + sike_fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1 +} + +void sike_xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e) +{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q <- (3^e)*P. + memmove(Q, P, sizeof(*P)); + for (size_t i = 0; i < e; i++) { + sike_xTPL(Q, Q, A24minus, A24plus); + } +} + +void sike_get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff) +{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. + // Input: projective point of order three P = (X3:Z3). + // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. + f2elm_t t0, t1, t2, t3, t4; + + sike_fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z + sike_fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2 + sike_fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z + sike_fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2 + sike_fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2 + sike_fp2add(coeff[0], coeff[1], t3); // t3 = 2*X + sike_fp2sqr_mont(t3, t3); // t3 = 4*X^2 + sike_fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + sike_fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2 + sike_fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2 + sike_fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 + sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) + sike_fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 + sike_fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] + sike_fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 + sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) + sike_fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 + sike_fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] + sike_fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] + sike_fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 +} + + +void sike_eval_3_isog(point_proj_t Q, f2elm_t* coeff) +{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and + // a point P with 2 coefficients in coeff (computed in the function get_3_isog()). + // Inputs: projective points P = (X3:Z3) and Q = (X:Z). + // Output: the projective point Q <- phi(Q) = (X3:Z3). + f2elm_t t0, t1, t2; + + sike_fp2add(Q->X, Q->Z, t0); // t0 = X+Z + sike_fp2sub(Q->X, Q->Z, t1); // t1 = X-Z + sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z) + sike_fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z) + sike_fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z) + sike_fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z) + sike_fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2 + sike_fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2 + sike_fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2 + sike_fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2 +} + + +void sike_inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3) +{ // 3-way simultaneous inversion + // Input: z1,z2,z3 + // Output: 1/z1,1/z2,1/z3 (override inputs). + f2elm_t t0, t1, t2, t3; + + sike_fp2mul_mont(z1, z2, t0); // t0 = z1*z2 + sike_fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 + sike_fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) + sike_fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) + sike_fp2mul_mont(t2, z2, t3); // t3 = 1/z1 + sike_fp2mul_mont(t2, z1, z2); // z2 = 1/z2 + sike_fp2mul_mont(t0, t1, z3); // z3 = 1/z3 + sike_fp2copy(t3, z1); // z1 = 1/z1 +} + + +void sike_get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A) +{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. + // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. + // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. + f2elm_t t0, t1, one = F2ELM_INIT; + + extern const struct params_t sike_params; + sike_fpcopy(sike_params.mont_one, one->c0); + sike_fp2add(xP, xQ, t1); // t1 = xP+xQ + sike_fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ + sike_fp2mul_mont(xR, t1, A); // A = xR*t1 + sike_fp2add(t0, A, A); // A = A+t0 + sike_fp2mul_mont(t0, xR, t0); // t0 = t0*xR + sike_fp2sub(A, one, A); // A = A-1 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2add(t1, xR, t1); // t1 = t1+xR + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2sqr_mont(A, A); // A = A^2 + sike_fp2inv_mont(t0); // t0 = 1/t0 + sike_fp2mul_mont(A, t0, A); // A = A*t0 + sike_fp2sub(A, t1, A); // Afinal = A-t1 +} + + +void sike_j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv) +{ // Computes the j-invariant of a Montgomery curve with projective constant. + // Input: A,C in GF(p^2). + // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. + f2elm_t t0, t1; + + sike_fp2sqr_mont(A, jinv); // jinv = A^2 + sike_fp2sqr_mont(C, t1); // t1 = C^2 + sike_fp2add(t1, t1, t0); // t0 = t1+t1 + sike_fp2sub(jinv, t0, t0); // t0 = jinv-t0 + sike_fp2sub(t0, t1, t0); // t0 = t0-t1 + sike_fp2sub(t0, t1, jinv); // jinv = t0-t1 + sike_fp2sqr_mont(t1, t1); // t1 = t1^2 + sike_fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2sqr_mont(t0, t1); // t1 = t0^2 + sike_fp2mul_mont(t0, t1, t0); // t0 = t0*t1 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2inv_mont(jinv); // jinv = 1/jinv + sike_fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv +} + + +void sike_xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24) +{ // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + f2elm_t t0, t1, t2; + + sike_fp2add(P->X, P->Z, t0); // t0 = XP+ZP + sike_fp2sub(P->X, P->Z, t1); // t1 = XP-ZP + sike_fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 + sike_fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ + sike_fp2correction(t2); + sike_fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + sike_fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + sike_fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 + sike_fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + sike_fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + sike_fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + sike_fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + sike_fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + sike_fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + sike_fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + sike_fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + sike_fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + sike_fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + sike_fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} diff --git a/src/third_party/sike/isogeny.h b/src/third_party/sike/isogeny.h new file mode 100644 index 00000000..18337dd2 --- /dev/null +++ b/src/third_party/sike/isogeny.h @@ -0,0 +1,49 @@ +#ifndef ISOGENY_H_ +#define ISOGENY_H_ + +// Computes [2^e](X:Z) on Montgomery curve with projective +// constant via e repeated doublings. +void sike_xDBLe( + const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, + const f2elm_t C24, size_t e); +// Simultaneous doubling and differential addition. +void sike_xDBLADD( + point_proj_t P, point_proj_t Q, const f2elm_t xPQ, + const f2elm_t A24); +// Tripling of a Montgomery point in projective coordinates (X:Z). +void sike_xTPL( + const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, + const f2elm_t A24plus); +// Computes [3^e](X:Z) on Montgomery curve with projective constant +// via e repeated triplings. +void sike_xTPLe( + const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, + const f2elm_t A24plus, size_t e); +// Given the x-coordinates of P, Q, and R, returns the value A +// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. +void sike_get_A( + const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A); +// Computes the j-invariant of a Montgomery curve with projective constant. +void sike_j_inv( + const f2elm_t A, const f2elm_t C, f2elm_t jinv); +// Computes the corresponding 4-isogeny of a projective Montgomery +// point (X4:Z4) of order 4. +void sike_get_4_isog( + const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff); +// Computes the corresponding 3-isogeny of a projective Montgomery +// point (X3:Z3) of order 3. +void sike_get_3_isog( + const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, + f2elm_t* coeff); +// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) +// of order 3 on a Montgomery curve and a point P with coefficients given in coeff. +void sike_eval_3_isog( + point_proj_t Q, f2elm_t* coeff); +// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. +void sike_eval_4_isog( + point_proj_t P, f2elm_t* coeff); +// 3-way simultaneous inversion +void sike_inv_3_way( + f2elm_t z1, f2elm_t z2, f2elm_t z3); + +#endif // ISOGENY_H_ diff --git a/src/third_party/sike/sike.c b/src/third_party/sike/sike.c new file mode 100644 index 00000000..87b74174 --- /dev/null +++ b/src/third_party/sike/sike.c @@ -0,0 +1,531 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: supersingular isogeny key encapsulation (SIKE) protocol +*********************************************************************************************/ + +#include <assert.h> +#include <stdint.h> +#include <string.h> +#include <openssl/bn.h> +#include <openssl/base.h> +#include <openssl/rand.h> +#include <openssl/mem.h> +#include <openssl/sha.h> + +#include "utils.h" +#include "isogeny.h" +#include "fpx.h" + +extern const struct params_t sike_params; + +// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant. +#define SIDH_JINV_BYTESZ 110U +// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny) +#define SIDH_PRV_A_BITSZ 216U +// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny) +#define SIDH_PRV_B_BITSZ 217U +// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation +#define MAX_INT_POINTS_ALICE 7U +// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation +#define MAX_INT_POINTS_BOB 8U + +// Swap points. +// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P +#if !defined(OPENSSL_X86_64) || defined(OPENSSL_NO_ASM) +static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) +{ + crypto_word_t temp; + for (size_t i = 0; i < NWORDS_FIELD; i++) { + temp = option & (P->X->c0[i] ^ Q->X->c0[i]); + P->X->c0[i] = temp ^ P->X->c0[i]; + Q->X->c0[i] = temp ^ Q->X->c0[i]; + temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]); + P->Z->c0[i] = temp ^ P->Z->c0[i]; + Q->Z->c0[i] = temp ^ Q->Z->c0[i]; + temp = option & (P->X->c1[i] ^ Q->X->c1[i]); + P->X->c1[i] = temp ^ P->X->c1[i]; + Q->X->c1[i] = temp ^ Q->X->c1[i]; + temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]); + P->Z->c1[i] = temp ^ P->Z->c1[i]; + Q->Z->c1[i] = temp ^ Q->Z->c1[i]; + } +} +#endif + +// Swap points. +// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P +static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) +{ +#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) + sike_cswap_asm(P, Q, option); +#else + sike_cswap(P, Q, option); +#endif +} + +static void ladder3Pt( + const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m, + int is_A, point_proj_t R, const f2elm_t A) { + point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT; + f2elm_t A24 = F2ELM_INIT; + crypto_word_t mask; + int bit, swap, prevbit = 0; + + const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ; + + // Initializing constant + sike_fpcopy(sike_params.mont_one, A24[0].c0); + sike_fp2add(A24, A24, A24); + sike_fp2add(A, A24, A24); + sike_fp2div2(A24, A24); + sike_fp2div2(A24, A24); // A24 = (A+2)/4 + + // Initializing points + sike_fp2copy(xQ, R0->X); + sike_fpcopy(sike_params.mont_one, R0->Z[0].c0); + sike_fp2copy(xPQ, R2->X); + sike_fpcopy(sike_params.mont_one, R2->Z[0].c0); + sike_fp2copy(xP, R->X); + sike_fpcopy(sike_params.mont_one, R->Z[0].c0); + memset(R->Z->c1, 0, sizeof(R->Z->c1)); + + // Main loop + for (size_t i = 0; i < nbits; i++) { + bit = (m[i >> 3] >> (i & 7)) & 1; + swap = bit ^ prevbit; + prevbit = bit; + mask = 0 - (crypto_word_t)swap; + + sike_fp2cswap(R, R2, mask); + sike_xDBLADD(R0, R2, R->X, A24); + sike_fp2mul_mont(R2->X, R->Z, R2->X); + } + + mask = 0 - (crypto_word_t)prevbit; + sike_fp2cswap(R, R2, mask); +} + +// Initialization of basis points +static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) { + sike_fpcopy(gen, XP->c0); + sike_fpcopy(gen + NWORDS_FIELD, XP->c1); + sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0); + sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1); + sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0); + sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1); +} + +// Conversion of GF(p^2) element from Montgomery to standard representation. +static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) { + f2elm_t t; + sike_from_fp2mont(x, t); + + // convert to bytes in little endian form + for (size_t i=0; i<FIELD_BYTESZ; i++) { + enc[i+ 0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF; + enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF; + } +} + +// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation. +// Elements over GF(p503) are encoded in 63 octets in little endian format +// (i.e., the least significant octet is located in the lowest memory address). +static inline void fp2_decode(const uint8_t *enc, f2elm_t t) { + memset(t[0].c0, 0, sizeof(t[0].c0)); + memset(t[0].c1, 0, sizeof(t[0].c1)); + // convert bytes in little endian form to f2elm_t + for (size_t i = 0; i < FIELD_BYTESZ; i++) { + t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (8*(i%LSZ)); + t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ)); + } + sike_to_fp2mont(t, t); +} + +// Alice's ephemeral public key generation +// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes. +// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes. +static void gen_iso_A(const uint8_t* skA, uint8_t* pkA) +{ + point_proj_t R, pts[MAX_INT_POINTS_ALICE]; + point_proj_t phiP = POINT_PROJ_INIT; + point_proj_t phiQ = POINT_PROJ_INIT; + point_proj_t phiR = POINT_PROJ_INIT; + f2elm_t XPA, XQA, XRA, coeff[3]; + f2elm_t A24plus = F2ELM_INIT; + f2elm_t C24 = F2ELM_INIT; + f2elm_t A = F2ELM_INIT; + unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; + + // Initialize basis points + sike_init_basis(sike_params.A_gen, XPA, XQA, XRA); + sike_init_basis(sike_params.B_gen, phiP->X, phiQ->X, phiR->X); + sike_fpcopy(sike_params.mont_one, (phiP->Z)->c0); + sike_fpcopy(sike_params.mont_one, (phiQ->Z)->c0); + sike_fpcopy(sike_params.mont_one, (phiR->Z)->c0); + + // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1 + sike_fpcopy(sike_params.mont_one, A24plus->c0); + sike_fp2add(A24plus, A24plus, A24plus); + sike_fp2add(A24plus, A24plus, C24); + sike_fp2add(A24plus, C24, A); + sike_fp2add(C24, C24, A24plus); + + // Retrieve kernel point + ladder3Pt(XPA, XQA, XRA, skA, 1, R, A); + + // Traverse tree + index = 0; + for (size_t row = 1; row < A_max; row++) { + while (index < A_max-row) { + sike_fp2copy(R->X, pts[npts]->X); + sike_fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = sike_params.A_strat[ii++]; + sike_xDBLe(R, R, A24plus, C24, (2*m)); + index += m; + } + sike_get_4_isog(R, A24plus, C24, coeff); + + for (size_t i = 0; i < npts; i++) { + sike_eval_4_isog(pts[i], coeff); + } + sike_eval_4_isog(phiP, coeff); + sike_eval_4_isog(phiQ, coeff); + sike_eval_4_isog(phiR, coeff); + + sike_fp2copy(pts[npts-1]->X, R->X); + sike_fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + sike_get_4_isog(R, A24plus, C24, coeff); + sike_eval_4_isog(phiP, coeff); + sike_eval_4_isog(phiQ, coeff); + sike_eval_4_isog(phiR, coeff); + + sike_inv_3_way(phiP->Z, phiQ->Z, phiR->Z); + sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X); + sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); + sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X); + + // Format public key + sike_fp2_encode(phiP->X, pkA); + sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ); + sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ); +} + +// Bob's ephemeral key-pair generation +// It produces a private key skB and computes the public key pkB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. +// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes. +static void gen_iso_B(const uint8_t* skB, uint8_t* pkB) +{ + point_proj_t R, pts[MAX_INT_POINTS_BOB]; + point_proj_t phiP = POINT_PROJ_INIT; + point_proj_t phiQ = POINT_PROJ_INIT; + point_proj_t phiR = POINT_PROJ_INIT; + f2elm_t XPB, XQB, XRB, coeff[3]; + f2elm_t A24plus = F2ELM_INIT; + f2elm_t A24minus = F2ELM_INIT; + f2elm_t A = F2ELM_INIT; + unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + + // Initialize basis points + sike_init_basis(sike_params.B_gen, XPB, XQB, XRB); + sike_init_basis(sike_params.A_gen, phiP->X, phiQ->X, phiR->X); + sike_fpcopy(sike_params.mont_one, (phiP->Z)->c0); + sike_fpcopy(sike_params.mont_one, (phiQ->Z)->c0); + sike_fpcopy(sike_params.mont_one, (phiR->Z)->c0); + + // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1 + sike_fpcopy(sike_params.mont_one, A24plus->c0); + sike_fp2add(A24plus, A24plus, A24plus); + sike_fp2add(A24plus, A24plus, A24minus); + sike_fp2add(A24plus, A24minus, A); + sike_fp2add(A24minus, A24minus, A24plus); + + // Retrieve kernel point + ladder3Pt(XPB, XQB, XRB, skB, 0, R, A); + + // Traverse tree + index = 0; + for (size_t row = 1; row < B_max; row++) { + while (index < B_max-row) { + sike_fp2copy(R->X, pts[npts]->X); + sike_fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = sike_params.B_strat[ii++]; + sike_xTPLe(R, R, A24minus, A24plus, m); + index += m; + } + sike_get_3_isog(R, A24minus, A24plus, coeff); + + for (size_t i = 0; i < npts; i++) { + sike_eval_3_isog(pts[i], coeff); + } + sike_eval_3_isog(phiP, coeff); + sike_eval_3_isog(phiQ, coeff); + sike_eval_3_isog(phiR, coeff); + + sike_fp2copy(pts[npts-1]->X, R->X); + sike_fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + sike_get_3_isog(R, A24minus, A24plus, coeff); + sike_eval_3_isog(phiP, coeff); + sike_eval_3_isog(phiQ, coeff); + sike_eval_3_isog(phiR, coeff); + + sike_inv_3_way(phiP->Z, phiQ->Z, phiR->Z); + sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X); + sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); + sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X); + + // Format public key + sike_fp2_encode(phiP->X, pkB); + sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ); + sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ); +} + +// Alice's ephemeral shared secret computation +// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB +// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes. +// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes. +// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes. +static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA) +{ + point_proj_t R, pts[MAX_INT_POINTS_ALICE]; + f2elm_t coeff[3], PKB[3], jinv; + f2elm_t A24plus = F2ELM_INIT; + f2elm_t C24 = F2ELM_INIT; + f2elm_t A = F2ELM_INIT; + unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; + + // Initialize images of Bob's basis + fp2_decode(pkB, PKB[0]); + fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]); + fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]); + + // Initialize constants + sike_get_A(PKB[0], PKB[1], PKB[2], A); + sike_fpadd(sike_params.mont_one, sike_params.mont_one, C24->c0); + sike_fp2add(A, C24, A24plus); + sike_fpadd(C24->c0, C24->c0, C24->c0); + + // Retrieve kernel point + ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A); + + // Traverse tree + index = 0; + for (size_t row = 1; row < A_max; row++) { + while (index < A_max-row) { + sike_fp2copy(R->X, pts[npts]->X); + sike_fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = sike_params.A_strat[ii++]; + sike_xDBLe(R, R, A24plus, C24, (2*m)); + index += m; + } + sike_get_4_isog(R, A24plus, C24, coeff); + + for (size_t i = 0; i < npts; i++) { + sike_eval_4_isog(pts[i], coeff); + } + + sike_fp2copy(pts[npts-1]->X, R->X); + sike_fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + sike_get_4_isog(R, A24plus, C24, coeff); + sike_fp2add(A24plus, A24plus, A24plus); + sike_fp2sub(A24plus, C24, A24plus); + sike_fp2add(A24plus, A24plus, A24plus); + sike_j_inv(A24plus, C24, jinv); + sike_fp2_encode(jinv, ssA); +} + +// Bob's ephemeral shared secret computation +// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA +// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. +// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes. +// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes. +static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB) +{ + point_proj_t R, pts[MAX_INT_POINTS_BOB]; + f2elm_t coeff[3], PKB[3], jinv; + f2elm_t A24plus = F2ELM_INIT; + f2elm_t A24minus = F2ELM_INIT; + f2elm_t A = F2ELM_INIT; + unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + + // Initialize images of Alice's basis + fp2_decode(pkA, PKB[0]); + fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]); + fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]); + + // Initialize constants + sike_get_A(PKB[0], PKB[1], PKB[2], A); + sike_fpadd(sike_params.mont_one, sike_params.mont_one, A24minus->c0); + sike_fp2add(A, A24minus, A24plus); + sike_fp2sub(A, A24minus, A24minus); + + // Retrieve kernel point + ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A); + + // Traverse tree + index = 0; + for (size_t row = 1; row < B_max; row++) { + while (index < B_max-row) { + sike_fp2copy(R->X, pts[npts]->X); + sike_fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = sike_params.B_strat[ii++]; + sike_xTPLe(R, R, A24minus, A24plus, m); + index += m; + } + sike_get_3_isog(R, A24minus, A24plus, coeff); + + for (size_t i = 0; i < npts; i++) { + sike_eval_3_isog(pts[i], coeff); + } + + sike_fp2copy(pts[npts-1]->X, R->X); + sike_fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + sike_get_3_isog(R, A24minus, A24plus, coeff); + sike_fp2add(A24plus, A24minus, A); + sike_fp2add(A, A, A); + sike_fp2sub(A24plus, A24minus, A24plus); + sike_j_inv(A, A24plus, jinv); + sike_fp2_encode(jinv, ssB); +} + +int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ], + uint8_t out_pub[SIKE_PUB_BYTESZ]) { + int ret = 0; + + // Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and < + // 253 bits + BIGNUM *bn_sidh_prv = BN_new(); + if (!bn_sidh_prv || + !BN_rand(bn_sidh_prv, SIDH_PRV_B_BITSZ, BN_RAND_TOP_ONE, + BN_RAND_BOTTOM_ANY) || + !BN_bn2le_padded(out_priv, BITS_TO_BYTES(SIDH_PRV_B_BITSZ), + bn_sidh_prv)) { + goto end; + } + + gen_iso_B(out_priv, out_pub); + ret = 1; + +end: + BN_free(bn_sidh_prv); + return ret; +} + +void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ], + uint8_t out_ciphertext[SIKE_CT_BYTESZ], + const uint8_t pub_key[SIKE_PUB_BYTESZ]) { + // Secret buffer is reused by the function to store some ephemeral + // secret data. It's size must be maximum of SHA256_CBLOCK, + // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes. + uint8_t secret[SHA256_CBLOCK]; + uint8_t j[SIDH_JINV_BYTESZ]; + uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ]; + SHA256_CTX ctx; + + // Generate secret key for A + // secret key A = SHA256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ + RAND_bytes(temp, SIKE_MSG_BYTESZ); + + SHA256_Init(&ctx); + SHA256_Update(&ctx, temp, SIKE_MSG_BYTESZ); + SHA256_Update(&ctx, pub_key, SIKE_PUB_BYTESZ); + SHA256_Final(secret, &ctx); + + // Generate public key for A - first part of the ciphertext + gen_iso_A(secret, out_ciphertext); + + // Generate c1: + // h = SHA256(j-invariant) + // c1 = h ^ m + ex_iso_A(secret, pub_key, j); + SHA256_Init(&ctx); + SHA256_Update(&ctx, j, sizeof(j)); + SHA256_Final(secret, &ctx); + + // c1 = h ^ m + uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ]; + for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { + c1[i] = temp[i] ^ secret[i]; + } + + SHA256_Init(&ctx); + SHA256_Update(&ctx, temp, SIKE_MSG_BYTESZ); + SHA256_Update(&ctx, out_ciphertext, SIKE_CT_BYTESZ); + SHA256_Final(secret, &ctx); + // Generate shared secret out_shared_key = SHA256(m||out_ciphertext) + memcpy(out_shared_key, secret, SIKE_SS_BYTESZ); +} + +void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ], + const uint8_t ciphertext[SIKE_CT_BYTESZ], + const uint8_t pub_key[SIKE_PUB_BYTESZ], + const uint8_t priv_key[SIKE_PRV_BYTESZ]) { + // Secret buffer is reused by the function to store some ephemeral + // secret data. It's size must be maximum of SHA256_CBLOCK, + // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes. + uint8_t secret[SHA256_CBLOCK]; + uint8_t j[SIDH_JINV_BYTESZ]; + uint8_t c0[SIKE_PUB_BYTESZ]; + uint8_t temp[SIKE_MSG_BYTESZ]; + uint8_t shared_nok[SIKE_MSG_BYTESZ]; + SHA256_CTX ctx; + + // This is OK as we are only using ephemeral keys in BoringSSL + RAND_bytes(shared_nok, SIKE_MSG_BYTESZ); + + // Recover m + // Let ciphertext = c0 || c1 - both have fixed sizes + // m = F(j-invariant(c0, priv_key)) ^ c1 + ex_iso_B(priv_key, ciphertext, j); + + SHA256_Init(&ctx); + SHA256_Update(&ctx, j, sizeof(j)); + SHA256_Final(secret, &ctx); + + const uint8_t *c1 = &ciphertext[sizeof(c0)]; + for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { + temp[i] = c1[i] ^ secret[i]; + } + + SHA256_Init(&ctx); + SHA256_Update(&ctx, temp, SIKE_MSG_BYTESZ); + SHA256_Update(&ctx, pub_key, SIKE_PUB_BYTESZ); + SHA256_Final(secret, &ctx); + + // Recover c0 = public key A + gen_iso_A(secret, c0); + crypto_word_t ok = constant_time_is_zero_w( + CRYPTO_memcmp(c0, ciphertext, SIKE_PUB_BYTESZ)); + for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { + temp[i] = constant_time_select_8(ok, temp[i], shared_nok[i]); + } + + SHA256_Init(&ctx); + SHA256_Update(&ctx, temp, SIKE_MSG_BYTESZ); + SHA256_Update(&ctx, ciphertext, SIKE_CT_BYTESZ); + SHA256_Final(secret, &ctx); + + // Generate shared secret out_shared_key = SHA256(m||ciphertext) + memcpy(out_shared_key, secret, SIKE_SS_BYTESZ); +} diff --git a/src/third_party/sike/sike.h b/src/third_party/sike/sike.h new file mode 100644 index 00000000..5819ebf4 --- /dev/null +++ b/src/third_party/sike/sike.h @@ -0,0 +1,64 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: API header file for SIKE +*********************************************************************************************/ + +#ifndef SIKE_H_ +#define SIKE_H_ + +#include <stdint.h> +#include <openssl/base.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +/* SIKE + * + * SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the + * algorithm is provided in [SIKE]. This implementation uses 434-bit field size. The code + * is based on "Additional_Implementations" from PQC NIST submission package which can + * be found here: + * https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip + * + * [SIKE] https://sike.org/files/SIDH-spec.pdf + */ + +// SIKE_PUB_BYTESZ is the number of bytes in a public key. +#define SIKE_PUB_BYTESZ 330 +// SIKE_PRV_BYTESZ is the number of bytes in a private key. +#define SIKE_PRV_BYTESZ 28 +// SIKE_SS_BYTESZ is the number of bytes in a shared key. +#define SIKE_SS_BYTESZ 16 +// SIKE_MSG_BYTESZ is the number of bytes in a random bit string concatenated +// with the public key (see 1.4 of SIKE). +#define SIKE_MSG_BYTESZ 16 +// SIKE_SS_BYTESZ is the number of bytes in a ciphertext. +#define SIKE_CT_BYTESZ (SIKE_PUB_BYTESZ + SIKE_MSG_BYTESZ) + +// SIKE_keypair outputs a public and secret key. Internally it uses BN_rand() as +// an entropy source. In case of success function returns 1, otherwise 0. +OPENSSL_EXPORT int SIKE_keypair( + uint8_t out_priv[SIKE_PRV_BYTESZ], + uint8_t out_pub[SIKE_PUB_BYTESZ]); + +// SIKE_encaps generates and encrypts a random session key, writing those values to +// |out_shared_key| and |out_ciphertext|, respectively. +OPENSSL_EXPORT void SIKE_encaps( + uint8_t out_shared_key[SIKE_SS_BYTESZ], + uint8_t out_ciphertext[SIKE_CT_BYTESZ], + const uint8_t pub_key[SIKE_PUB_BYTESZ]); + +// SIKE_decaps outputs a random session key, writing it to |out_shared_key|. +OPENSSL_EXPORT void SIKE_decaps( + uint8_t out_shared_key[SIKE_SS_BYTESZ], + const uint8_t ciphertext[SIKE_CT_BYTESZ], + const uint8_t pub_key[SIKE_PUB_BYTESZ], + const uint8_t priv_key[SIKE_PRV_BYTESZ]); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/third_party/sike/sike_test.cc b/src/third_party/sike/sike_test.cc new file mode 100644 index 00000000..2180a528 --- /dev/null +++ b/src/third_party/sike/sike_test.cc @@ -0,0 +1,251 @@ +/* Copyright (c) 2018, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#include <gtest/gtest.h> +#include <stdint.h> + +#include "sike.h" +#include "fpx.h" +#include "../../crypto/test/abi_test.h" + +TEST(SIKE, RoundTrip) { + uint8_t sk[SIKE_PRV_BYTESZ] = {0}; + uint8_t pk[SIKE_PUB_BYTESZ] = {0}; + uint8_t ct[SIKE_CT_BYTESZ] = {0}; + uint8_t ss_enc[SIKE_SS_BYTESZ] = {0}; + uint8_t ss_dec[SIKE_SS_BYTESZ] = {0}; + + for (size_t i = 0; i < 30; i++) { + EXPECT_EQ(SIKE_keypair(sk, pk), 1); + SIKE_encaps(ss_enc, ct, pk); + SIKE_decaps(ss_dec, ct, pk, sk); + + EXPECT_EQ(memcmp(ss_enc, ss_dec, SIKE_SS_BYTESZ), 0); + } +} + +TEST(SIKE, Decapsulation) { + const uint8_t sk[SIKE_PRV_BYTESZ] = { + 0xB1, 0xFD, 0x34, 0x42, 0xDB, 0x02, 0xBC, 0x9D, 0x4C, 0xD0, + 0x72, 0x34, 0x4D, 0xBD, 0x06, 0xDF, 0x1C, 0x7D, 0x0A, 0x88, + 0xB2, 0x50, 0xC4, 0xF6, 0xAE, 0xE8, 0x25, 0x01}; + + const uint8_t pk[SIKE_PUB_BYTESZ] = { + 0x6D, 0x8D, 0xF5, 0x7B, 0xCD, 0x47, 0xCA, 0xCB, 0x7A, 0x38, 0xB7, 0xA6, + 0x90, 0xB7, 0x37, 0x03, 0xD4, 0x6F, 0x27, 0x73, 0x74, 0x17, 0x5A, 0xA4, + 0x0D, 0xC6, 0x81, 0xAD, 0xDB, 0xF7, 0x18, 0xB2, 0x3C, 0x30, 0xCF, 0xAA, + 0x08, 0x11, 0x91, 0xCC, 0x27, 0x4E, 0xF1, 0xA6, 0xB7, 0xDA, 0xD2, 0xCF, + 0x99, 0x7F, 0xF7, 0xE1, 0xD0, 0xCE, 0x00, 0xD2, 0x4B, 0xA4, 0x33, 0xB4, + 0x87, 0x01, 0x3F, 0x02, 0xF7, 0xF9, 0xDE, 0xC3, 0x60, 0x62, 0xDA, 0x3F, + 0x74, 0xA9, 0x44, 0xBE, 0x19, 0xD5, 0x03, 0x2A, 0x79, 0x8C, 0xA7, 0xFF, + 0xEA, 0xB3, 0xBB, 0xB5, 0xD4, 0x1D, 0x8F, 0x92, 0xCE, 0x62, 0x6E, 0x99, + 0x24, 0xD7, 0x57, 0xFA, 0xCD, 0xB6, 0xE2, 0x8E, 0xFD, 0x22, 0x0E, 0x31, + 0x21, 0x01, 0x8D, 0x79, 0xF8, 0x3E, 0x27, 0xEC, 0x43, 0x40, 0xDB, 0x82, + 0xE5, 0xEB, 0x6C, 0x97, 0x66, 0x29, 0x15, 0x68, 0xB7, 0x4D, 0x84, 0xD1, + 0x8A, 0x0B, 0x12, 0x36, 0x2C, 0x0C, 0x0A, 0x6E, 0x4E, 0xDE, 0xA5, 0x8A, + 0xDE, 0x77, 0xDD, 0x70, 0x49, 0x73, 0xAC, 0x27, 0x6D, 0x8D, 0x25, 0x9A, + 0xE4, 0x25, 0xE8, 0x95, 0x8F, 0xFE, 0x90, 0x3B, 0x00, 0x69, 0x20, 0xE8, + 0x7C, 0xA5, 0xF5, 0x79, 0xC0, 0x61, 0x51, 0x91, 0x35, 0x25, 0x3F, 0x17, + 0x2F, 0x70, 0x73, 0xF0, 0x89, 0xB5, 0xC8, 0x25, 0xB8, 0xE5, 0x7E, 0x34, + 0xDD, 0x11, 0xE5, 0xD6, 0xC3, 0xD5, 0x29, 0x89, 0xC6, 0x2C, 0x99, 0x53, + 0x1D, 0x2C, 0x77, 0xB0, 0xB6, 0xA1, 0xBD, 0x79, 0xFB, 0x4A, 0xC2, 0x48, + 0x4C, 0x62, 0x51, 0x00, 0xE3, 0x91, 0x2A, 0xCB, 0x84, 0x03, 0x5D, 0x2D, + 0xC8, 0x33, 0xE9, 0x14, 0xBF, 0x74, 0x21, 0xBC, 0xF4, 0x76, 0xE5, 0x42, + 0xB8, 0xBD, 0xE2, 0xE7, 0x20, 0x95, 0x54, 0xF2, 0xED, 0xC0, 0x79, 0x38, + 0x1E, 0xD2, 0xEA, 0x1A, 0x63, 0x85, 0xE7, 0x3A, 0xDA, 0xAD, 0xAB, 0x1B, + 0x1E, 0x19, 0x9E, 0x73, 0xD0, 0x10, 0x2E, 0x38, 0xAC, 0x8B, 0x00, 0x6A, + 0x30, 0x2C, 0x3D, 0x70, 0x8E, 0x39, 0x6D, 0xC0, 0x12, 0x61, 0x7D, 0x2A, + 0x0A, 0x04, 0x95, 0x8E, 0x09, 0x3C, 0x7B, 0xEC, 0x2E, 0xBC, 0xE8, 0xE8, + 0xE8, 0x37, 0x29, 0xC4, 0x7E, 0x76, 0x48, 0xB9, 0x3B, 0x72, 0xE5, 0x99, + 0x9B, 0xF9, 0xE3, 0x99, 0x72, 0x3F, 0x35, 0x29, 0x85, 0xE0, 0xC8, 0xBF, + 0xB1, 0x6B, 0xB1, 0x6E, 0x72, 0x00}; + + const uint8_t ct[SIKE_CT_BYTESZ] = { + 0xFF, 0xEB, 0xEF, 0x4A, 0xC0, 0x57, 0x0F, 0x26, 0xAC, 0x76, 0xA8, 0xB0, + 0xA3, 0x5D, 0x9C, 0xD9, 0x25, 0xD1, 0x7F, 0x92, 0x5D, 0xF4, 0x23, 0x34, + 0xC3, 0x03, 0x10, 0xE1, 0xB0, 0x24, 0x9B, 0x44, 0x58, 0x26, 0x13, 0x56, + 0x83, 0x43, 0x72, 0x69, 0x28, 0x0D, 0x55, 0x07, 0x1F, 0xDB, 0xC0, 0x23, + 0x34, 0x83, 0x1A, 0x09, 0x9B, 0x80, 0x00, 0x64, 0x56, 0xDC, 0x79, 0x7A, + 0xD2, 0xCE, 0x23, 0xC9, 0x72, 0x27, 0xFC, 0x8D, 0xAB, 0xBF, 0xD3, 0x17, + 0xF6, 0x91, 0x7B, 0x15, 0x93, 0x83, 0x8A, 0x4F, 0x6C, 0xCA, 0x4A, 0x94, + 0xDA, 0xC7, 0x9D, 0xB6, 0xD6, 0xBA, 0xBD, 0x81, 0x9A, 0x78, 0xE5, 0xE5, + 0xBE, 0x17, 0xBC, 0xCB, 0xC8, 0x23, 0x80, 0x5F, 0x75, 0xF8, 0xDB, 0x51, + 0x55, 0x00, 0x25, 0x33, 0x52, 0x64, 0xB2, 0xD6, 0xD8, 0x9A, 0x2A, 0x9E, + 0x29, 0x99, 0x13, 0x33, 0xE2, 0xA7, 0x98, 0xAC, 0xD7, 0x79, 0x5C, 0x2F, + 0xBA, 0x07, 0xC3, 0x03, 0x37, 0xD6, 0xE6, 0xB5, 0xA1, 0xF5, 0x29, 0xB6, + 0xF6, 0xC0, 0x5C, 0x44, 0x68, 0x2B, 0x0B, 0xF5, 0x00, 0x01, 0x44, 0xD5, + 0xCC, 0x23, 0xB5, 0x27, 0x4F, 0xCA, 0xB4, 0x05, 0x01, 0xF9, 0xD4, 0x41, + 0xE0, 0xE1, 0x1E, 0xCF, 0xA9, 0xBC, 0x79, 0xD7, 0xD5, 0xF5, 0x3C, 0xE6, + 0x93, 0xF4, 0x6C, 0x84, 0x5A, 0x2C, 0x4B, 0xE4, 0x91, 0xB2, 0xB2, 0xB8, + 0xAD, 0x74, 0x9A, 0x69, 0x79, 0x4C, 0x84, 0xB7, 0xBF, 0xF1, 0x68, 0x4B, + 0xAE, 0x0F, 0x7F, 0x45, 0x3B, 0x18, 0x3F, 0xFA, 0x00, 0x48, 0xE0, 0x3A, + 0xE2, 0xC0, 0xAE, 0x00, 0xCE, 0x90, 0x28, 0xA4, 0x1B, 0xBE, 0xCA, 0x0C, + 0x21, 0x29, 0x64, 0x30, 0x5E, 0x35, 0xAD, 0xFD, 0x83, 0x47, 0x40, 0x6D, + 0x15, 0x56, 0xFC, 0xF8, 0x5F, 0xAB, 0x81, 0xFE, 0x6B, 0xE9, 0x6B, 0xED, + 0x27, 0x35, 0x7C, 0xD8, 0x2C, 0xD4, 0xF2, 0x11, 0xE6, 0xAF, 0xDF, 0xB8, + 0x91, 0x96, 0xEB, 0xF7, 0x4C, 0x8D, 0x70, 0x77, 0x90, 0x81, 0x00, 0x09, + 0x19, 0x27, 0x8A, 0x9E, 0xB6, 0x1A, 0xE9, 0xAC, 0x6C, 0xC9, 0xF8, 0xEA, + 0xA2, 0x34, 0xB8, 0xAC, 0xB3, 0xB3, 0x68, 0xA1, 0xB7, 0x29, 0x55, 0xCA, + 0x40, 0x23, 0x92, 0x5C, 0x0C, 0x79, 0x6B, 0xD6, 0x9F, 0x5B, 0xD2, 0xE6, + 0xAE, 0x04, 0xCB, 0xEC, 0xC7, 0x88, 0x18, 0xDB, 0x7A, 0xE6, 0xD6, 0xC9, + 0x39, 0xFD, 0x93, 0x9B, 0xC8, 0x01, 0x6F, 0x3E, 0x6C, 0x90, 0x3E, 0x73, + 0x76, 0x99, 0x7C, 0x48, 0xDA, 0x68, 0x48, 0x80, 0x2B, 0x63}; + + const uint8_t ss_exp[SIKE_SS_BYTESZ] = {0xA1, 0xF9, 0x5A, 0x67, 0xB9, 0x3D, + 0x1E, 0x72, 0xE8, 0xC5, 0x71, 0xF1, + 0x4C, 0xB2, 0xAA, 0x6D}; + + uint8_t ss_dec[SIKE_SS_BYTESZ] = {0}; + SIKE_decaps(ss_dec, ct, pk, sk); + EXPECT_EQ(memcmp(ss_dec, ss_exp, sizeof(ss_exp)), 0); +} + +// SIKE_encaps and SIKE_keypair doesn't return zeros. +TEST(SIKE, NonZero) { + uint8_t sk[SIKE_PRV_BYTESZ] = {0}; + uint8_t pk[SIKE_PUB_BYTESZ] = {0}; + uint8_t ct[SIKE_CT_BYTESZ] = {0}; + uint8_t ss[SIKE_SS_BYTESZ] = {0}; + + // Check secret and public key returned by SIKE_keypair + EXPECT_EQ(SIKE_keypair(sk, pk), 1); + uint8_t tmp = 0; + for (size_t i = 0; i < sizeof(sk); i++) { + tmp |= sk[i]; + } + EXPECT_NE(tmp, 0); + + tmp = 0; + for (size_t i = 0; i < sizeof(pk); i++) { + tmp |= pk[i]; + } + EXPECT_NE(tmp, 0); + + // Check shared secret and ciphertext returned by SIKE_encaps + SIKE_encaps(ss, ct, pk); + tmp = 0; + for (size_t i = 0; i < sizeof(ct); i++) { + tmp |= ct[i]; + } + EXPECT_NE(tmp, 0); + + tmp = 0; + for (size_t i = 0; i < sizeof(ss); i++) { + tmp |= ss[i]; + } + EXPECT_NE(tmp, 0); +} + +TEST(SIKE, Negative) { + uint8_t sk[SIKE_PRV_BYTESZ] = {0}; + uint8_t pk[SIKE_PUB_BYTESZ] = {0}; + uint8_t ct[SIKE_CT_BYTESZ] = {0}; + uint8_t ss_enc[SIKE_SS_BYTESZ] = {0}; + uint8_t ss_dec[SIKE_SS_BYTESZ] = {0}; + + EXPECT_EQ(SIKE_keypair(sk, pk), 1); + SIKE_encaps(ss_enc, ct, pk); + + // Change cipertext + uint8_t ct_tmp[SIKE_CT_BYTESZ] = {0}; + memcpy(ct_tmp, ct, sizeof(ct)); + ct_tmp[0] = ~ct_tmp[0]; + SIKE_decaps(ss_dec, ct_tmp, pk, sk); + EXPECT_NE(memcmp(ss_enc, ss_dec, SIKE_SS_BYTESZ), 0); + + // Change secret key + uint8_t sk_tmp[SIKE_PRV_BYTESZ] = {0}; + memcpy(sk_tmp, sk, sizeof(sk)); + sk_tmp[0] = ~sk_tmp[0]; + SIKE_decaps(ss_dec, ct, pk, sk_tmp); + EXPECT_NE(memcmp(ss_enc, ss_dec, SIKE_SS_BYTESZ), 0); + + // Change public key + uint8_t pk_tmp[SIKE_PUB_BYTESZ] = {0}; + memcpy(pk_tmp, pk, sizeof(pk)); + pk_tmp[0] = ~pk_tmp[0]; + SIKE_decaps(ss_dec, ct, pk_tmp, sk); + EXPECT_NE(memcmp(ss_enc, ss_dec, SIKE_SS_BYTESZ), 0); +} + +TEST(SIKE, Unaligned) { + alignas(4) uint8_t priv[SIKE_PRV_BYTESZ + 1]; + alignas(4) uint8_t pub[SIKE_PUB_BYTESZ + 1]; + alignas(4) uint8_t shared_key1[SIKE_SS_BYTESZ + 1]; + alignas(4) uint8_t ciphertext[SIKE_CT_BYTESZ + 1]; + alignas(4) uint8_t shared_key2[SIKE_SS_BYTESZ + 1]; + + ASSERT_TRUE(SIKE_keypair(priv + 1, pub + 1)); + SIKE_encaps(shared_key1 + 1, ciphertext + 1, pub + 1); + SIKE_decaps(shared_key2 + 1, ciphertext + 1, pub + 1, priv + 1); + + EXPECT_EQ(memcmp(shared_key1 + 1, shared_key2 + 1, SIKE_SS_BYTESZ), 0); +} + +#if defined(SUPPORTS_ABI_TEST) && \ + (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) +TEST(SIKE, ABI) { + felm_t a, b, c; + dfelm_t d, e, f; + CHECK_ABI(sike_fpadd, a, b, c); + CHECK_ABI(sike_fpsub, a, b, c); + CHECK_ABI(sike_mpmul, a, b, d); + CHECK_ABI(sike_fprdc, d, a); + CHECK_ABI(sike_mpadd_asm, a, b, c); + CHECK_ABI(sike_mpsubx2_asm, d, e, f); + CHECK_ABI(sike_mpdblsubx2_asm, d, e, f); +} + +// Additional tests for checking if assembly implementation +// of MUL and REDC handles carry chains correctly. +TEST(SIKE, CarryChains) { + // Expected results + const dfelm_t exp_mul = { + 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + }; + + const felm_t exp_redc = { + 0x93AA0C8C2D3235BE, 0xA8CD35DDDE399B46, 0xB9BBA5469509CA65, + 0x6B2FB3A5A2FB86E4, 0x585591BA6DBE862C, 0xD92D3FF5FE0938F2, + 0x0001E1F0EE75A1E1 + }; + + // Input + dfelm_t in14 = { + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF + }; + + felm_t in7 = { + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF + }; + + dfelm_t res; + sike_mpmul(in7, in7, res); + EXPECT_EQ(memcmp(exp_mul, res, sizeof(exp_mul)), 0); + + // modifies in14 and in7 + sike_fprdc(in14, in7); + EXPECT_EQ(memcmp(exp_redc, in7, sizeof(exp_redc)), 0); +} +#endif // SUPPORTS_ABI_TEST && (X86_64 || AARCH64) diff --git a/src/third_party/sike/utils.h b/src/third_party/sike/utils.h new file mode 100644 index 00000000..cbc83293 --- /dev/null +++ b/src/third_party/sike/utils.h @@ -0,0 +1,145 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: internal header file for P434 +*********************************************************************************************/ + +#ifndef UTILS_H_ +#define UTILS_H_ + +#include <openssl/base.h> + +#include "../crypto/internal.h" +#include "sike.h" + +// Conversion macro from number of bits to number of bytes +#define BITS_TO_BYTES(nbits) (((nbits)+7)/8) + +// Bit size of the field +#define BITS_FIELD 434 +// Byte size of the field +#define FIELD_BYTESZ BITS_TO_BYTES(BITS_FIELD) +// Number of 64-bit words of a 224-bit element +#define NBITS_ORDER 224 +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) +// Number of elements in Alice's strategy +#define A_max 108 +// Number of elements in Bob's strategy +#define B_max 137 +// Word size size +#define RADIX sizeof(crypto_word_t)*8 +// Byte size of a limb +#define LSZ sizeof(crypto_word_t) + +#if defined(OPENSSL_64_BIT) + // Number of words of a 434-bit field element + #define NWORDS_FIELD 7 + // Number of "0" digits in the least significant part of p434 + 1 + #define ZERO_WORDS 3 + // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal. + #define U64_TO_WORDS(x) UINT64_C(x) +#else + // Number of words of a 434-bit field element + #define NWORDS_FIELD 14 + // Number of "0" digits in the least significant part of p434 + 1 + #define ZERO_WORDS 6 + // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal. + #define U64_TO_WORDS(x) \ + (uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32) +#endif + +// Extended datatype support +#if !defined(BORINGSSL_HAS_UINT128) + typedef uint64_t uint128_t[2]; +#endif + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo)); + +// If mask |x|==0xff.ff set |x| to 1, otherwise 0 +#define M2B(x) ((x)>>(RADIX-1)) + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ +do { \ + crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = M2B(constant_time_lt_w(tempReg, (crypto_word_t)(carryIn)) | \ + constant_time_lt_w((sumOut), tempReg)); \ +} while(0) + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ +do { \ + crypto_word_t tempReg = (minuend) - (subtrahend); \ + crypto_word_t borrowReg = M2B(constant_time_lt_w((minuend), (subtrahend))); \ + borrowReg |= ((borrowIn) & constant_time_is_zero_w(tempReg)); \ + (differenceOut) = tempReg - (crypto_word_t)(borrowIn); \ + (borrowOut) = borrowReg; \ +} while(0) + +/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly, + which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8). + Defines below are used to work around the bug, and provide a way + to initialize f2elem_t and point_proj_t structs. + Bug has been fixed in GCC6 (debian stretch). +*/ +#define F2ELM_INIT {{ {0}, {0} }} +#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }} + +// Datatype for representing 434-bit field elements (448-bit max.) +// Elements over GF(p434) are encoded in 63 octets in little endian format +// (i.e., the least significant octet is located in the lowest memory address). +typedef crypto_word_t felm_t[NWORDS_FIELD]; + +// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e. +// Fp2 element = c0 + c1*i in F_{p^2} +// Datatype for representing double-precision 2x434-bit field elements (448-bit max.) +// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are +// encoded as {a, b}, with a in the lowest memory portion. +typedef struct { + felm_t c0; + felm_t c1; +} fp2; + +// Our F_{p^2} element type is a pointer to the struct. +typedef fp2 f2elm_t[1]; + +// Datatype for representing double-precision 2x434-bit +// field elements in contiguous memory. +typedef crypto_word_t dfelm_t[2*NWORDS_FIELD]; + +// Constants used during SIKE computation. +struct params_t { + // Stores a prime + const crypto_word_t prime[NWORDS_FIELD]; + // Stores prime + 1 + const crypto_word_t prime_p1[NWORDS_FIELD]; + // Stores prime * 2 + const crypto_word_t prime_x2[NWORDS_FIELD]; + // Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} + // in GF(prime^2), expressed in Montgomery representation + const crypto_word_t A_gen[6*NWORDS_FIELD]; + // Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i} + // in GF(prime^2), expressed in Montgomery representation + const crypto_word_t B_gen[6*NWORDS_FIELD]; + // Montgomery constant mont_R2 = (2^448)^2 mod prime + const crypto_word_t mont_R2[NWORDS_FIELD]; + // Value 'one' in Montgomery representation + const crypto_word_t mont_one[NWORDS_FIELD]; + // Value '6' in Montgomery representation + const crypto_word_t mont_six[NWORDS_FIELD]; + // Fixed parameters for isogeny tree computation + const unsigned int A_strat[A_max-1]; + const unsigned int B_strat[B_max-1]; +}; + +// Point representation in projective XZ Montgomery coordinates. +typedef struct { + f2elm_t X; + f2elm_t Z; +} point_proj; +typedef point_proj point_proj_t[1]; + +#endif // UTILS_H_ diff --git a/src/third_party/wycheproof_testvectors/kwp_test.txt b/src/third_party/wycheproof_testvectors/kwp_test.txt new file mode 100644 index 00000000..ef484910 --- /dev/null +++ b/src/third_party/wycheproof_testvectors/kwp_test.txt @@ -0,0 +1,1562 @@ +# Imported from Wycheproof's third_party/wycheproof_testvectors/kwp_test.json. +# This file is generated by convert_wycheproof.go. Do not edit by hand. +# +# Algorithm: KWP +# Generator version: 0.4.12 + +[keySize = 128] + +# tcId = 1 +ct = 8cd63fa6788aa5edfa753fc87d645a672b14107c3b4519e7 +key = 6f67486d1e914419cb43c28509c7c1ea +msg = 8dc0632d92ee0be4f740028410b08270 +result = valid + +# tcId = 2 +ct = e8bac475d1429034b32f9bdeec09a37f9b3704028f1e0270 +key = a0b17172bb296db7f5c869e9a36b5ce3 +msg = 615dd022d607c910f20178cbdf42060f +result = valid + +# tcId = 3 +ct = 4c8bcd601b508ef399f71b841294497a4493c4a0014c0103 +key = 0e49d571c19b5250effd41d94bde39d6 +msg = f25e4de8caca363fd5f29442eb147b55 +result = valid + +# tcId = 4 +# wrapped key is longer than wrapping key +ct = 9e4510cc84c4bd7abab0a8a5d7f1e6ff3e6777ca2dff9be7e223652239fe57d8 +key = e0e12959109103e30ae8b5684a22e662 +msg = dbb0f2bb2be912a20430972d9842ce3fd3b928e573e1ac8e +result = acceptable + +# tcId = 5 +# wrapped key is longer than wrapping key +ct = 8fbf39ae583bd4efa7a3e8f7b86870b34766ae7d8923a8e97b0cd289ad98cacb +key = dd583d9f1059861430ec8b5d8a180e9b +msg = f2e34f356362a31b51d6e02bcd333c9e6170494ca5ff5487 +result = acceptable + +# tcId = 6 +# wrapped key is longer than wrapping key +ct = df2fbe5fa86418edc7b5b04a4aea724aca17e88cedc84ca8b0b0f048e64590cb +key = faf5ccfae42b43cee2c5f0f3177a7c5d +msg = 4e02084833660c463830483b36dab866c64c8cf7429cac3d +result = acceptable + +# tcId = 7 +# wrapped key is longer than wrapping key +ct = 67f8edf57f84ea0a35b35511d67d3f299c9984b2c07d3809c3d7f5f45091f1a8fbb937ed447677f6 +key = c2b9d23f2831ddcdeb456853d4014db9 +msg = f4cfea98e58b939cc859554385cf3a6c7f8217f728efb431c964786de8274907 +result = acceptable + +# tcId = 8 +# wrapped key is longer than wrapping key +ct = 60d55a22ba7dbd7d8f317388e01e6be561d15d29f85c566f1259aa7e7dc3d5d30e0ef5f4c6267553 +key = 620a08f320cdedbf7ae551add348d95e +msg = cec34eaf8e67e1ce619ddfc309531c42f16033a7e2cbc4f5eb3a548164e9b291 +result = acceptable + +# tcId = 9 +# wrapped key is longer than wrapping key +ct = d78a8291108f0f2d8be0ec10ec08240bf4d3021f0a5ed7faba0748db73762f34a0504bd373212df2 +key = ed089ac274f8c7cea2415671a94b5e53 +msg = 6065e41df14daeeefacac5daeb7674cdc9c1f686013b797153e80ef215893299 +result = acceptable + +# tcId = 10 +# Round counter overflows 256 +ct = 9341221aca1c647e2afc2bdd9cf4ed6e60058eb0a84cb3fc2daf3a87d9fad0a1f8268b27aaf7201d705e72f7e2240309ad98742094e3f1c99b7faa9ae181b441f5004b8bc93cdd4160d403d0884749a3c379d47c112a45788c05c2106c98f59758d393e04c880691b0e8683a12df7f876e1e1f68b4acbae9cc8310b34d59ccf4617cee72e845df1e0e32e5b4938f2923d55f1bb5156dd8c787401e6ef241ea4073d0a59ddfcd7a53db5d89b480b030cfb9084ea8479b964f090bb612d5251eee9ef8870a45f1e76fd24abdd9b350fe148b15a4cfeb032d57b5743b3548a7ce9eec8e21a31ce832530edfd1cffd9bb37369e6463c6b373ab60d80b0a2677e92e658f7daf2a5234b7312bf2d967cd0bc809e9be2f706ae63bd632fd611f161e48ee19677f3243aa0e91f6651a1cef62feff7a72eedf830bae1dc6d89e55ccb5e6f97889c6266f7d3f2eb0aea6c8c42200febccc5916825368adc87e04e835de06fd7bc2805c219e7f0b6252563f29969b1f30cfa1a8da4b90ae7534fb849d068a7e77de7360f8af173 +key = b6121acad51038e11873aaa7e6c7be06 +msg = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +result = acceptable + +# tcId = 11 +# wrapping small key +ct = a65959a600000000 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = +result = acceptable + +# tcId = 12 +# wrapping small key +ct = 09bcbab50b8dd45ad83412e2919030d3 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 4c +result = acceptable + +# tcId = 13 +# wrapping small key +ct = 0cbe852cdce4f0b5333366f446b2b1c5 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = be52 +result = acceptable + +# tcId = 14 +# wrapping small key +ct = a9dc66e03435ab3d4f97ff66f2c911a3 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 2d5244 +result = acceptable + +# tcId = 15 +# wrapping small key +ct = 1b970c8ecb4187447e60e6083da03086 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 6c3d3b4c +result = acceptable + +# tcId = 16 +# wrapping small key +ct = 0344f7b34ab8ef28aaa843f276b0b3d5 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 0412ab3ec6 +result = acceptable + +# tcId = 17 +# wrapping small key +ct = 17356c7148334ca1a24aab7e82a66e18 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 8ae08938929c +result = acceptable + +# tcId = 18 +# wrapping small key +ct = 1db7510a55591a455d9f8167e6db3c88 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 7c8dfbb68d72af +result = acceptable + +# tcId = 19 +# wrapping small key +ct = 936fe58b629ea6ec158145218f2361c7 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 536f8f83b64771c1 +result = acceptable + +# tcId = 20 +# wrapping small key +ct = 6787816804b3127d0ca4073f1dba5c4d3db1ec9c227e6556 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 8571f282b18b64ec5e +result = acceptable + +# tcId = 21 +# wrapping small key +ct = 34131c3bfcc48af15eea8672e52927b462f81d5ba0e6260f +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 8ada889862813e364c4d +result = acceptable + +# tcId = 22 +# wrapping small key +ct = 4d1ec9287cd4dd378b9aefee79d4ed35bcb98ad9fa9fe529 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = f9c56e8058758a5c7c2baa +result = acceptable + +# tcId = 23 +# wrapping small key +ct = 7209f5b6bd5d4916f4995d280e9aa89edd5e96e3c9283ad2 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 7c7dbc83fa62206a521ed4ad +result = acceptable + +# tcId = 24 +# wrapping small key +ct = d85a1efc6ab3a40948f723d9810a5deb019b3ce0208a0d94 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = a6614daf00df6d14f50388bad5 +result = acceptable + +# tcId = 25 +# wrapping small key +ct = 43509b5df3688b6e44c1a994592f4c03da34712f886e63d5 +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 450580a47d7008321496bfb82f48 +result = acceptable + +# tcId = 26 +# wrapping small key +ct = 16e369351c40f220d3fb1197f35da652a3a40ca3b1e99bfb +key = 1abf4b7fa2bb62a78f09ddab04625dca +msg = 9efd21e13855eea8907afdcd8935f4 +result = acceptable + +# tcId = 27 +# Modified IV +ct = 82af032f5389caa503147d2825336eab84816fb6f8ae6df4 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 28 +# Modified IV +ct = 4e00a9eeef87eb6d7be4ec46204d94006c216d5177d2a83c +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 29 +# Modified IV +ct = d3dc6c3b4707a08039d621879caf419b9895482fff7bdcd0 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 30 +# Modified IV +ct = 09d3bfc3c9c5af2b2951b06406f7ea4d84e9c37402637e2c +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 31 +# Modified IV +ct = 3396679a4d87caf7ce7eb4707ba1c6526728f5a973191713 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 32 +# Modified IV +ct = ec637d90d945e92929c1c873d9aa9c47bc7b172237319d15 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 33 +# Modified IV +ct = 748f373d48d8590e2216b294b9ef94860dbb6b0b0ab625c5 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 34 +# Modified IV +ct = 61d7c584197f257caf2583e444896f1d3ba12509b1ef725b +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 35 +# Modified IV +ct = 7f8cda973fe58b484b120fc710b520c5636057629795f89a +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 36 +# Modified IV +ct = ccea198029edb9d848d6ca76667b666b1dbebd1e4b1faa8d +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 37 +# Modified IV +ct = ee08cb9d20a98b88b2d8f0e39acf34219d105dc14afbe364 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 38 +# Modified IV +ct = 6782992bf8cff068cf41341dd2ca04adedea92e846f74411 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 39 +# Modified IV +ct = 7ed35d0c08042dd56bb5df78056ecd21b8c797d36f57aaec +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 40 +# Modified IV +ct = 37e3b4cefee648766a8efe73d6af12812eded603ab7141bb +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 41 +# Modified IV +ct = fe73777d8992e07eef0d053ad5ec0bf8243fc7e0bc2b405b +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 42 +# Modified IV +ct = 39292c91b6b826d47d502043c3ba4f41e2ce32960a0291b5 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 43 +# Modified IV +ct = 36ef8fc13d0f1f5745e3939877b62b8ecba2f5f0b19f9e90 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 44 +# Modified IV +ct = 7255c4eacb4105a68095e9e5b5a4bd8f9623a0da5c6fc230 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 45 +# Modified IV +ct = ea26eec89a46ff1a628834c7247a8e4e45d8a8d3229e26cc +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 46 +# Modified IV +ct = 508593fa85a8effd27c8a225981978fcec6e992eb488c9c2 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 47 +# Modified IV +ct = b8a4cb22f15529864d4ced8e8abae69752a9045a084dfc3f +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 48 +# Modified IV +ct = a0a6bf5e47e89706932b1057b680c3c81dc4d9d0b4f9153b +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 49 +# Modified IV +ct = 11f3af4ed30e77520517c880f1d0c272a89a968dc697cb5a +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 50 +# Modified IV +ct = 6fc912a0bda73bacfa93db4002f18f349fa30f22f7a95ab9 +key = 4f710eb6b5e28703becfc3dc52fa8bc1 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 51 +# RFC 3349 padding +ct = 3731038571c35f7dcc55e48892de353e54c079b89774bbfd +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 000102030405060708090a0b0c0d0e0f +result = invalid + +# tcId = 52 +# Invalid encryption +ct = d85c6bfd092df1aeae5a548e47aa7681 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 0001020304050607 +result = invalid + +# tcId = 53 +# padding too long +ct = 7a92427387f5587ee825d1ffa011c40286844ecdadce31cd9678338694ea2682 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 000000000000000000000000000000000000000000000000 +result = invalid + +# tcId = 54 +# padding too long +ct = a437d354606ae752894feb62c8def7d17046d8e47f9aed755fba48b3a3009e3ff67d34e26a779064 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 0000000000000000000000000000000000000000000000000000000000000000 +result = invalid + +# tcId = 55 +# incorrectly encoded length +ct = e8d240d64f16d1522ae2ded42ced257dfec158ff2fe1467d +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 56 +# length = 2**32-1 +ct = 6d1bfda356b7b954e7aaccc6df953322f75be95947b02b30 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 57 +# length = 2**32-1 +ct = 17dbf878ef4076cfcaba5f81d7b123d7 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 0000000000000000 +result = invalid + +# tcId = 58 +# length = 2**31-1 +ct = 75c23e253478037802fae0f86af9c78d4e4d9be0c3bff89f +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 59 +# length = 2**31 + 16 +ct = 55717658c6a35e15ee36c66cce91083b63091f51525c0b51 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 60 +# data is incorrectly padded +ct = 8ede88a52ccb8a6d617456955a9f04c94d87696125ded87eebe3e97e185496d9 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = ffffffffffffffffffffffffffffffffffffffffffffffff +result = invalid + +# tcId = 61 +# data is incorrectly padded +ct = 5b4a8f1abffa51676ac8b5ddf9366c12 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 0001020304050607 +result = invalid + +# tcId = 62 +# length = 0 +ct = 205cc6dd9592da0ebff6b4b48a0c450eeaeb11a60d33f387 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 63 +# RFC 3349 padding with incorrect size +ct = 908a68b0d2054e199220d37c34a2e136 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 0001020304050607 +result = invalid + +# tcId = 64 +# length = 9 +ct = f84bdb15045cee3a8a0f3ed2f07c1771 +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 0000000000000000 +result = invalid + +# tcId = 65 +# length = 16 +ct = 7592b1ee6ee92c9467db366adcfa65bb +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 0000000000000000 +result = invalid + +# tcId = 66 +# length = 2**31 + 8 +ct = db93a1db3b5babc80a304d527682c1ef +key = 48a53c11ef2d727db7eb9a834b134ea9 +msg = 0000000000000000 +result = invalid + +[keySize = 192] + +# tcId = 67 +ct = 5c117a678223cfe5ee691503061e7ab1e5f720e005171b32 +key = f75a2f49a630c7dc91626b00ce029f0bd2981d7c74a93ebe +msg = 9adbc00c710b1101bdf6a4ed65b32d72 +result = valid + +# tcId = 68 +ct = 6a7f9e03b6f379c56da3a56d8f32eba515454a91fd417449 +key = b713f6b7814f98894d7b153974684359f1460213eb74be68 +msg = 78585f0c49922e82caf17ebc3721b4db +result = valid + +# tcId = 69 +ct = 764097f5ee8236bc0d93bbcea139a652f4b211cc33a61ac9 +key = 13ecf423211caa334ba6db37259a535c20de8ad10fc8c432 +msg = 4fc75d0f221e22408a37e11265d49a05 +result = valid + +# tcId = 70 +ct = 04b83ec803a75bbcb2f87fc6f488a4ccc1827b412483070eed195b6f0048ccbe +key = 4417fbbea51bdd91818d74051957dd70e135c5cf3732bdf1 +msg = f5357da9f8fd4a1190f36e9fa09a90fcf14d87d62332f1a5 +result = valid + +# tcId = 71 +ct = 46ab71f032cb1ccbcc7447a5183574268c0167a26a93fe8422bf284417aa93ea +key = b3f26d8a22fdd61f709841231fbde695b3f28dddced6d41e +msg = 0d0af955d2e3829cc3d643219b301e64e0510dfbc428119a +result = valid + +# tcId = 72 +ct = 47ca298ee47b1b755a499129347e11e7a25754ccb6c2689e8eff270e98c81d18 +key = f70cfb262c729a18206c8afd74356ec7e049d10b44a6e000 +msg = 241cedfa64c4e7bec541a2eb4c368269e0f0ddebc58267ea +result = valid + +# tcId = 73 +# wrapped key is longer than wrapping key +ct = ecac4c91758e1ae7bb010c34f4c5f99a3d728b9fa92cb778d3fe80d777a20d3de85ef46e7a0c6a6a +key = 1639f9f81e53e2eeb677a249e5eced3af108971301601a7b +msg = ec3c6a1f1a9585327fe658490c74635e5300876da5846a629398984fb551d691 +result = acceptable + +# tcId = 74 +# wrapped key is longer than wrapping key +ct = 39b7326a44eaed08bffbd4aeaf3e2c3f899c1fd049384ed7b3eb92b788c6449acd6385f0bb18cf28 +key = 1f22d5658aa685b8ba8659dc342880d5b2399e6a815005b0 +msg = 50be4c1b2f29a63f44d7fc63737f600f0194ea3fb36e173d2ddd19f218656380 +result = acceptable + +# tcId = 75 +# wrapped key is longer than wrapping key +ct = 3d2e9f39c7b13e9585227c4344fbe596f92b002456616f137deacc6a8c941649ce294bb2695c1807 +key = 3a2f4aa50441954bba5a1836294ce071f9296b23dbed6771 +msg = 65da02ff21b483a1e39575490b4319e84ae0299f1f00b3859fbe2e74b3ec2aaf +result = acceptable + +# tcId = 76 +# Round counter overflows 256 +ct = d6aacfb52c26baae78c2f54259a4e4168f817064344e2ba8fbfa7fae9f1fd69bd5bc5c1e20a6101b4a7119cbce028e25a9e93d29ee260c4e609baedee788411c2afe60218ce1b0d28b9c29b941251fdcbac3009d59040a0337b8b4a3a020c6d8f310cba63db046d8f36b64c9092e75cee463fc7692ef56bed395c4579da0ecb02129e45ad8a7f116aac6170204888e40693f017a6a0a7dd3962004e60db3a9b6c8b7614a467ccb799bce1ba83f5c0921f1e52bb3909bc0486ec0eaea736498f3ba520a519c3ddf491307958620b737613417b15b438b80b43189baa455031f5771502002ea170c767b33d247feebce62e606f2262537f85f18d1951cc75cedef291c6a501cb1778586249b58156eb8d7283a3f508ee8bcc1206d77bbd6892fe74b865bfc02a8f07223087a6c1e50a41b7cf5f6ee04bd07766b2e5b34c4a7666b0ce06f670e6434a59fb74e0df36c91d94e5e8b721e53e09b6f6504c5d515492a373fcc348a63122cc6e4716e0e1a543d038c6f7731199f691780a8a655cca6718e3dc56e815b3669 +key = b6121acad51038e11873aaa7e6c7be06f93826b74fec0ea1 +msg = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +result = acceptable + +# tcId = 77 +# wrapping small key +ct = a65959a600000000 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = +result = acceptable + +# tcId = 78 +# wrapping small key +ct = 52c7f388d0d4237afaa29f2b94723475 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = a3 +result = acceptable + +# tcId = 79 +# wrapping small key +ct = 833431ce8799be69b36aafe3f38d9dac +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 594b +result = acceptable + +# tcId = 80 +# wrapping small key +ct = 31674f46b989f6ead582c70dedc8c6b9 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 72ab34 +result = acceptable + +# tcId = 81 +# wrapping small key +ct = 80535172d2a498aa31601d70fdca9dea +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = d4d9460f +result = acceptable + +# tcId = 82 +# wrapping small key +ct = 56232300dd7b2a71d2328b6df47af8e3 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 643972e552 +result = acceptable + +# tcId = 83 +# wrapping small key +ct = e27e08efe39adbbad8d300b87be2c258 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = f3cdb73d2561 +result = acceptable + +# tcId = 84 +# wrapping small key +ct = 8f90942cdab33e58b24a23ad7efb7538 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 7b0b53b6429e14 +result = acceptable + +# tcId = 85 +# wrapping small key +ct = 0ebaf23c858015d3bda5b8d908db6049 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 6b2393773e6d1378 +result = acceptable + +# tcId = 86 +# wrapping small key +ct = d56f89977b8eff511158edad6b993007189e5a4b8c0e2faf +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 2c52d6639e769960e8 +result = acceptable + +# tcId = 87 +# wrapping small key +ct = dd889475a76733849f59bed49a15d4315bdb5ba00dc63470 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 707c9356216d69c69048 +result = acceptable + +# tcId = 88 +# wrapping small key +ct = 1a9b3369239b0f40a8dc5bd8d965caf7431445799337b99b +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 615f6fa79e1847e7359a8a +result = acceptable + +# tcId = 89 +# wrapping small key +ct = 5232f8f6679a17d3303b0bd72b06b56b5089e80372dc295b +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 7f5e999168ec60624426cbb1 +result = acceptable + +# tcId = 90 +# wrapping small key +ct = e5544361c60980f3d38f2d8820a150f48f49ef3f9184b29f +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 3f93aaf4463775baf6c0c975ae +result = acceptable + +# tcId = 91 +# wrapping small key +ct = 55396065905915ec914b8d1efbf471e37d283fc2c1496b49 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = fefcf10c976309b2beb085771e50 +result = acceptable + +# tcId = 92 +# wrapping small key +ct = d90376be302a24c541bd6d96094f0025e3d73888391b4306 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b +msg = 6854354d0099f7eff740b0587140b3 +result = acceptable + +# tcId = 93 +# Modified IV +ct = eee27510be39cc88379459420f3773642a423ac1ff0cfb84 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 94 +# Modified IV +ct = 765df3fa1aca6f13268ba79f8659807049a313a0308b643e +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 95 +# Modified IV +ct = 71346c17a2718cb7c357e3af2b2d0c3e29b7e02317926746 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 96 +# Modified IV +ct = 55fd49ba081fdf72896068c5a968e2b3c4a473786a2e12c2 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 97 +# Modified IV +ct = 133c66fcbf0e9d5139eff3fcb494b672d72bb622d7015c4b +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 98 +# Modified IV +ct = 8439244f27470e5f1f294cfa22ef5412675d7fbbd92ff016 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 99 +# Modified IV +ct = 4265bdb7d8ea30d9a51e5f48b7ac5487e0c95f154ea8baeb +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 100 +# Modified IV +ct = 31afcca8ff2b8806408c3460181ee5a96bbaf51d133211be +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 101 +# Modified IV +ct = 196f2a6eccb5368fe6a3f2fa0874d8fc9b3b52484e2d6351 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 102 +# Modified IV +ct = 8dc73d363fdb32f6e0ff830c2a48db5815f66d0922694c74 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 103 +# Modified IV +ct = e90022b9da998b4a30c91c1bd1a1f8ca05a52432867e5e78 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 104 +# Modified IV +ct = c9898a1b70bd718df45f1f3eca82eab1eaddb8ed7f2380dc +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 105 +# Modified IV +ct = cc3f2cd6476eddbbfdc801b61174301688554f3db54c2903 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 106 +# Modified IV +ct = 23e15705e7b00d82bd052f0e0135ab7ac0dcce471ff2f1a7 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 107 +# Modified IV +ct = b8e2862c0f9eae4f44ad99496e3ed62b3b9c4ce7ab5afb74 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 108 +# Modified IV +ct = 66d8a7769d81421efda456992f6c26cb17665fe080b0160e +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 109 +# Modified IV +ct = a6a28bceb91551a395369ff09370658cc92b092855f417aa +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 110 +# Modified IV +ct = 03ff601cf12b432078a2185590fb5d01e3441cf084bcb04a +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 111 +# Modified IV +ct = e250d358d16d9fd20ad80a99656509229dca391aad3798f0 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 112 +# Modified IV +ct = 643a17860b116ec74089bc574685a6328a3d7a07cd18b520 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 113 +# Modified IV +ct = 3e86e8128904f753c0f3fe3401ba36672966567725c4726c +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 114 +# Modified IV +ct = d4b8cc849176b8344b0849490143d3512915171bd7d5759e +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 115 +# Modified IV +ct = f84e0e6ff64e0b27b8b59b5b77c223023f0fea95433864ec +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 116 +# Modified IV +ct = 4030b4b0e9c1b1ce8e52f6bdb48088e65b05844307989c8b +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 117 +# Modified IV +ct = ccb3b36c26b2d901b7f0765362d992b2d5089c2a7559b195becbe173780352fa +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 118 +# Modified IV +ct = 4e5fc8dccaeec9b1c8a606a2bd7d7201eede62b9c2e939a5aba663a6a040e361 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 119 +# Modified IV +ct = af21f5e7f15a63c8ea6001cf024f281e7f44aedd68954564fc2bd146e96d793a +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 120 +# Modified IV +ct = 3a4f571ffbf761d3f7d413172ee1e4ae2862baacfd5ab66dc685b9af8b70b538 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 121 +# Modified IV +ct = 273de386d5fef497f9487afd54c1c0fae8aacabf2af465caf352e2300d29266b +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 122 +# Modified IV +ct = 16511743dc44199cee1dbf5045141b075f01ee13326c9faf2c74b7c99791830f +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 123 +# Modified IV +ct = 370f92db00f7fc8a0e654318a5b3ff89a604034f421339201d79e0ec4d6088de +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 124 +# Modified IV +ct = e3edd0e84832f3615f6deefb444de3b9ec527741686029db91de0bb9b2a5c05d +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 125 +# RFC 3349 padding +ct = 36ee480138edf11e144efcddd24d2c121749da6e4eab17fe +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 000102030405060708090a0b0c0d0e0f +result = invalid + +# tcId = 126 +# Invalid encryption +ct = 166beb49e97a4a9cc7b0ccf441ec15b5 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 0001020304050607 +result = invalid + +# tcId = 127 +# padding too long +ct = 74ff3070a0a08471c001febb95a890f35159a9fe263719e40c2332ce5c58fada +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 000000000000000000000000000000000000000000000000 +result = invalid + +# tcId = 128 +# padding too long +ct = 4f0b38eb328d1227b1e17c103a44a373ff67cee953c59eea26117947b5d3ef8932c8858b4f9fb47c +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 0000000000000000000000000000000000000000000000000000000000000000 +result = invalid + +# tcId = 129 +# incorrectly encoded length +ct = 775dcabab9e4be8fd9963a4dc7a1447ef82888403882bdb6 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 130 +# length = 2**32-1 +ct = 669803237fa10eabb4d2c6ad85bd9f7df5f4a33340eb0ce9 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 131 +# length = 2**32-1 +ct = c788504d786f5c21b6671bf190657301 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 0000000000000000 +result = invalid + +# tcId = 132 +# length = 2**31-1 +ct = d079f60d3258f5e695d1a73db008ef38516b713eca2c0eaf +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 133 +# length = 2**31 + 16 +ct = f1ae4b8865013b0fc63b463e664cec3c6031f61f2de82f43 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 134 +# data is incorrectly padded +ct = 8874e1b6e15e3ef6c461411a5f5ad0c8b05368cd5b3ee39b2b413d18a4eebfc9 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = ffffffffffffffffffffffffffffffffffffffffffffffff +result = invalid + +# tcId = 135 +# data is incorrectly padded +ct = 890a3dab8439bb73b14c6e99c34f0b0e +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 0001020304050607 +result = invalid + +# tcId = 136 +# length = 0 +ct = d4f633aedeb89e349a98738b00ee42c90d583b16e986e49f +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 137 +# RFC 3349 padding with incorrect size +ct = b8b2a5b1d3280dcb4daeeed43f36509b +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 0001020304050607 +result = invalid + +# tcId = 138 +# length = 9 +ct = 4429cf64251d8a54a9d1389c01c30900 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 0000000000000000 +result = invalid + +# tcId = 139 +# length = 16 +ct = e5634eca10372c867c7f91ee813ec3f3 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 0000000000000000 +result = invalid + +# tcId = 140 +# length = 2**31 + 8 +ct = 9e517d4d0142e1544ba1e7419a696c21 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e +msg = 0000000000000000 +result = invalid + +# tcId = 141 +# RFC 3394 +ct = 138bdeaa9b8fa7fc61f97742e72248ee5ae6ae5360d1ae6a5f54f373fa543b6a +key = 5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8 +msg = c37b7e6492584340bed12207808941155068f738 +result = valid + +# tcId = 142 +# RFC 3394 +ct = afbeb0f07dfbf5419200f2ccb50bb24f +key = 5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8 +msg = 466f7250617369 +result = valid + +[keySize = 256] + +# tcId = 143 +ct = e3eab96d9a2fda12f9e252053aff15e753e5ea6f5172c92b +key = fce0429c610658ef8e7cfb0154c51de2239a8a317f5af5b6714f985fb5c4d75c +msg = 287326b5ed0078e7ca0164d748f667e7 +result = valid + +# tcId = 144 +ct = 9d2b42fb2fdb92c89fb0c3bcd9e1600d3334b4e35e791369 +key = 0dda6da5123e2c37c6fa16ba0d334cd01acd652f8994211751dfab4faac2fc22 +msg = b40b6828729b456322a8d065abc0d081 +result = valid + +# tcId = 145 +ct = 5291e05abd55f5886850855e3f9f2f576b101acc222d6766 +key = d6925914cd06308f81ad91e23073593d99d4e50351b20eb2a8d1a1ac4ced6588 +msg = 037b27b3dc95b19d15bd4091e320bfe1 +result = valid + +# tcId = 146 +ct = 4b1220525c537aec30ebcd562b694b4e9e2ccd819de22ef608b5d8090779d9de +key = 07518a82cbc8da1dcec55f3763a206d277487abd03cedd0b8bef9ee2fb157121 +msg = faa4664d79fce3c7d2fdd462f6c1c423c2f8e6b69be2e071 +result = valid + +# tcId = 147 +ct = 67b2cbd68f6a208d647bdc5af7d0bccf6711a9e8fd0d9434363006addd4b9696 +key = ea46991d4e71f53dd624e7fe7fde11944a7c5942d232369b8065d42b8cd2dde1 +msg = dffc5cf1dd5411d015d84601fa38df5effe885c7f26a4825 +result = valid + +# tcId = 148 +ct = cfdbbd95f187508a488fe017c5e5d5a5975b68441d520e0e931922388e28784c +key = fdcfa902c6f222f527af84da533b14b52e2615da3a89d1d35708b0cd49f60d87 +msg = 966b07047354966a703e79607b556032f4f596b7f9206f05 +result = valid + +# tcId = 149 +ct = b63b7e0fec7e315816233db6758fd3e744b9f6a40862bdf866487e53bcb950d8b2649269e51b4475 +key = 38e1b1d075d9d852b9a6c01c8ff6965af01bac457a4e339ae3e1d7b2ffacc0cd +msg = 80ad6820f1c90981e2ca42b817a345c1179d0a11d8e23a8adc0505e13d87295a +result = valid + +# tcId = 150 +ct = 837cfc316b49299edaf427e0988020ee876204b29d847669daab72c8660b0d860e9de3bd851198ff +key = c641f1689d81caa8ba37d895272240664054ed974cfffc40e6c5c0cad1b916c7 +msg = 3fd0ba19955e46749f54d88e99d080b7339d588fe612ec0f4021ca3ca2104270 +result = valid + +# tcId = 151 +ct = 0e9e2e9aa34bbf973d67bc534ac86fc5b5a5f9da5f026866177894ec6077a5c84501510e1bf4afb3 +key = aa0ab9d68ed4a04e723f81b44c0c88d0bcde7a80cfd476eb4b8836d9aa01ec4c +msg = 57faa8766f6d6a0aa1cf643f857c150df5b31303b50af480e21c4b5e8c8a15d5 +result = valid + +# tcId = 152 +# Round counter overflows 256 +ct = 1c6b7e4003384f071bf29baea9098ad81da8e9862909329f52793b35d592c10dba15aa89400ea6403df8dcaffd0dbf5606303f109f79ad700ed5d5ad4e59950ce9ce5296c9d186a0df441973d1835f9ac000ad1a6797875c3a03161e9e3f5ea464032e407854eadca5a9e7a386bb0d29253e3804adefd8c0402cc8c40ac7f9041429cc0bb77a405b284baa2dae764ea09c654c0a82f2c5724221ba44e341503d3103dbc393c7702182f8cc2762ddbc873b7f84197709886a4b5df5b04ff9d21b79b50904af3c32128dfb9cde94fe1254d981e6ce3acfda82db1fa2badbccd2d29052a04a69ce1f5652f30496ea57edc7e3e885dd4a35ca15aba602bb4c888a8064da94c2ac5c12c11f608810af46fbb49c3e8f8771ff661f8d8dccd163d0c4a401b8b9aa74e68a56011cf78d21dc7541a974f9dad5ae27f8a26d1b0e76be2f86c6a21e9d1c2b5df3c8878a8bcae143b3af1f082afc52616eeadd2232926597b245d394931e02e493b0bc27a92d013e111694cac2c5a2a46e008a8498b5c31bb5ec35a4e9957e365d +key = b6121acad51038e11873aaa7e6c7be06f93826b74fec0ea1c02f9981ed49d16a +msg = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +result = acceptable + +# tcId = 153 +# wrapping small key +ct = a65959a600000000 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = +result = acceptable + +# tcId = 154 +# wrapping small key +ct = 06c1e65ac0f385b4e8c400d229f39422 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = ae +result = acceptable + +# tcId = 155 +# wrapping small key +ct = c98da5936a1313eba1a6773b8060ea5e +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = c548 +result = acceptable + +# tcId = 156 +# wrapping small key +ct = b2a77d9b837e87cdb7391e1df7cdaf14 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = f713b9 +result = acceptable + +# tcId = 157 +# wrapping small key +ct = d8ecf20191f75aa36686298bfa5022ab +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = f375cbf7 +result = acceptable + +# tcId = 158 +# wrapping small key +ct = 077362f50356fc7c54c70f9cb4306f7d +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = d9445094b1 +result = acceptable + +# tcId = 159 +# wrapping small key +ct = a4bd6a116ad88a52aae3f0c0cb893f9b +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = fab43e91ae15 +result = acceptable + +# tcId = 160 +# wrapping small key +ct = 68a52de00ec0f1ebbedc38fee6be0c23 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = 90735025797bd2 +result = acceptable + +# tcId = 161 +# wrapping small key +ct = 3a6746052a1744cfe7e2f36dafc4042d +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = e43f5e4e123a03c4 +result = acceptable + +# tcId = 162 +# wrapping small key +ct = db7e73da22219e1baac0f4e955c3db2b900b5d3078f94b59 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = 1723eb9d000916996a +result = acceptable + +# tcId = 163 +# wrapping small key +ct = f77ec14a010777f1f1071808f285c1c00b4e9420f0e8bf48 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = 8b18daecde14b8472ffd +result = acceptable + +# tcId = 164 +# wrapping small key +ct = 6b40d4f0863581a7d0365ad477568bfad94f8bf134984838 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = e5bd6fbacbf3ef0d40c884 +result = acceptable + +# tcId = 165 +# wrapping small key +ct = 660f645b02405a18f7225b68c0a09a949b2b5ba784922cfe +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = b3be5e5397df5f46b099e821 +result = acceptable + +# tcId = 166 +# wrapping small key +ct = 6bea6bf57601bf063873f47ec3572cfb9cfb595d8bdb5e97 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = 4cdd960cabcf8aaf69c37da1d3 +result = acceptable + +# tcId = 167 +# wrapping small key +ct = b631292536aaf02d829cc6d3c39e5a5cd76240889e9d51d0 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = da29e0889cf98742612e0326300b +result = acceptable + +# tcId = 168 +# wrapping small key +ct = de497acf18a177a3a9b3d8da46d74dfa58dcc537a3a95323 +key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093 +msg = 72aaee126a822184806c7d22eed66b +result = acceptable + +# tcId = 169 +# Modified IV +ct = aef4d2357a8fc5c3b4a80a15ed49781d3a82c98eb78c9180 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 170 +# Modified IV +ct = 6eaefd5193f0725fea545077a430860663901979f0b6f4a3 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 171 +# Modified IV +ct = f9ded536c1ae9c680f7d9c4b91a566a07b1628e9b9f4fccd +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 172 +# Modified IV +ct = 443526477c779a329ded0b230307afa64fdc10dfc86414dd +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 173 +# Modified IV +ct = 21ba79f3b423a66e7baad86fe49786e07a33dfdf227687e9 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 174 +# Modified IV +ct = 3e65dbacaae556fa18bd192035cd55958adeac30e5ca7b3b +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 175 +# Modified IV +ct = de2054883b00f81ff68e42b7ff1c05ef5faaf75b2bb14004 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 176 +# Modified IV +ct = 2aa3c6ba891d1211677d59f886cc6d05698243d10dc189f7 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae37 +result = invalid + +# tcId = 177 +# Modified IV +ct = 9b1e7d6caf42bb3a15530f2387ed7329310ba76e1852566a +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 178 +# Modified IV +ct = daf6a9f5e4b4985fcd4815bf6298a3039bcb32327b0876ff +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 179 +# Modified IV +ct = 14c4079399721142fd5fce26e9417064c7e0201fb7b5255c +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 180 +# Modified IV +ct = f48a30b8691a2a80dd79c355c281addf779bfed8971e3ce4 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 181 +# Modified IV +ct = 248f867430ffc954b494c936a3ef815b1754009928aaf0c4 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 182 +# Modified IV +ct = dcaa88dad9b03e59a3ac8350239824368004e2ca616c15d7 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 183 +# Modified IV +ct = a6cc8470192687ec9a31258ddb73084005784475f3442705 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 184 +# Modified IV +ct = 0527ab5408b4f1484b27f98641511143ab88783688256815 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070 +result = invalid + +# tcId = 185 +# Modified IV +ct = 7fd3ad3aee0545da1ed3a54d5a198a2c76cf8290c011c042 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 186 +# Modified IV +ct = a24e94c12b2e6b776c8febe9179521beae0cfbd507d358b4 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 187 +# Modified IV +ct = 9395b071fa3d9908b2e1b349bf7cd6a1cfc86b979c8c73cd +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 188 +# Modified IV +ct = 1eb452770bc0f26a3576b604bf5ac72f714fc468c357eba7 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 189 +# Modified IV +ct = b42bcb4161f40b30f3d2f740f43e441d3c9a39613914f1c6 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 190 +# Modified IV +ct = f3d76dd320e5f1b3f85b8f73a9ebcfabfb8346daafaf36e6 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 191 +# Modified IV +ct = b8e26164496942f44f16751096fb47952ec478bb288e72a1 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 192 +# Modified IV +ct = fa783b3aca0ec1e677378f23ebe937776fa590ecc6b01392 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5 +result = invalid + +# tcId = 193 +# Modified IV +ct = 8b011408049eab81cc185796b9636982c1ad28e940e5c35ab1219434c23e8c59 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 194 +# Modified IV +ct = 08db2f06aa2400d4cc1113b1c9e3ba1b39e3e26a84918f9266796c426c166428 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 195 +# Modified IV +ct = 3114404be000ee167b65dd3cfae3b10c50dffe1df864b5e52a2805f0c80021c0 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 196 +# Modified IV +ct = 405ae5bdeff8b05d28ea55900b8e81dc789d532ec3fc457730819e762172f751 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 197 +# Modified IV +ct = 7c19e66d21c0f1409ee6f03a36ab6ba532349e2567200b95d7f5012b2b7e5d33 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 198 +# Modified IV +ct = 955ac67d6e496b9b93a4dda8f6e65e668f1326b256ee146a7647ba18deee7986 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 199 +# Modified IV +ct = c8600aa18be27279493fd68c84130c8bc328b0f6821e01e892b6c2dc1c005270 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 200 +# Modified IV +ct = 492566e0dc539e234b08b95fb23594a6d14f59fa4367799495c2e7f2993135ec +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1 +result = invalid + +# tcId = 201 +# Modified IV +ct = 8c5c2ea18125a03d15d2a624c9bfcccdf53709a89ae03d5728c98943b13df72c6f02fc8e1cfcdfa7 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690 +result = invalid + +# tcId = 202 +# Modified IV +ct = 8836c5cb2eec2ca2541b18c1259933ebd601bd6763d9f7cebf06ed6abbe37d455aca13a2db87d111 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690 +result = invalid + +# tcId = 203 +# Modified IV +ct = 2554e0faf721d77f7dfadaaa90b70c2f242f93bdc4f876cd058a86ccfff33f8fd88736997f505d98 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690 +result = invalid + +# tcId = 204 +# Modified IV +ct = 53ee4c8f03212b389f5bc2b26bc898deb91a457f258a22028a688919e12c4da23090c26b5c9ff692 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690 +result = invalid + +# tcId = 205 +# Modified IV +ct = 3a63b0283ec071a4d4c32b0f30b384eccb3cd8d7fb12de6806e12fef5da82a7a39aad8128c3e5915 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690 +result = invalid + +# tcId = 206 +# Modified IV +ct = d1cfaaa9adc25f948c0c4720967b01488e06d3dfc5622b5de38a722798d4a3a44fa6194a92c5ede7 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690 +result = invalid + +# tcId = 207 +# Modified IV +ct = 251a71511a4e73d1469a051fd88fa78cae96547fd8ca8e323b05d8717cdcd239292c7bbe0708fae5 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690 +result = invalid + +# tcId = 208 +# Modified IV +ct = 14a62f7284124d795826cc89852e97dbe6b8a30ac56df07173878cf0136dbe386ec46327d6fc65f1 +key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7 +msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690 +result = invalid + +# tcId = 209 +# RFC 3349 padding +ct = ac1a774a5de27e4f9c356e4f62deaf8b7eeee6bcafafd895 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 000102030405060708090a0b0c0d0e0f +result = invalid + +# tcId = 210 +# Invalid encryption +ct = b3941437f55e7cbc3f88050aff703967 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 0001020304050607 +result = invalid + +# tcId = 211 +# padding too long +ct = 86175acf19ad0b7ac60d1fe4bb7850635e7ec6f8a314f85b6dd3d8f9349ea38d +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 000000000000000000000000000000000000000000000000 +result = invalid + +# tcId = 212 +# padding too long +ct = 791f088847a76731e0d56b9b2dcb28bf9f091a9725790e0a64fc8e7cb3ad50f380297a98e3b1c33e +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 0000000000000000000000000000000000000000000000000000000000000000 +result = invalid + +# tcId = 213 +# incorrectly encoded length +ct = 868c34495bd3d7b4e2c1861e7fcbbdb372099488dd96c9ea +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 214 +# length = 2**32-1 +ct = 4a8b4aeaa713469bfd9bf88d4072379fc858e40b24b0bebe +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 215 +# length = 2**32-1 +ct = c210aa3b5fbf5eac97e68d98d7727f38 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 0000000000000000 +result = invalid + +# tcId = 216 +# length = 2**31-1 +ct = e0ebd376e050cc9027b76dfc38ee2c6ae2808cecf480a560 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 217 +# length = 2**31 + 16 +ct = 23a693e211c08ab9b222c2ede2db18f437e22917fdff8032 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 218 +# data is incorrectly padded +ct = 003f2916fea6827e01199028d3dc4e03889113f97b1860cc242e5a0f28a0f159 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = ffffffffffffffffffffffffffffffffffffffffffffffff +result = invalid + +# tcId = 219 +# data is incorrectly padded +ct = 5c25a170d5225a6d66e117c691b37383 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 0001020304050607 +result = invalid + +# tcId = 220 +# length = 0 +ct = df9ef924eb59634be5b27cabd33d72bd6be6e01e4672ab05 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 00000000000000000000000000000000 +result = invalid + +# tcId = 221 +# RFC 3349 padding with incorrect size +ct = e6e66fad359a7b63a977788acd297121 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 0001020304050607 +result = invalid + +# tcId = 222 +# length = 9 +ct = 76b88ecda760b1af80703036185fc476 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 0000000000000000 +result = invalid + +# tcId = 223 +# length = 16 +ct = fd101943f4ab7c38ec68c75d4b3193dc +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 0000000000000000 +result = invalid + +# tcId = 224 +# length = 2**31 + 8 +ct = 1793a3a9bd146726edbcb9589f20e849 +key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2 +msg = 0000000000000000 +result = invalid + |