summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorPete Bentley <prb@google.com>2019-08-09 14:24:27 +0000
committerPete Bentley <prb@google.com>2019-08-09 14:24:27 +0000
commita5c947b7c91bac52eeb5086507b67e52a59ef980 (patch)
tree3725c3e206175c177a448c50d41ad2c2589a07fa /src/third_party
parent228bd6249d17f351ea66508b3ec3112ed1cbdf30 (diff)
downloadboringssl-a5c947b7c91bac52eeb5086507b67e52a59ef980.tar.gz
Revert "Revert "external/boringssl: Sync to 81080a729af568f7b5fde92b9170cc17065027c9.""
This reverts commit 228bd6249d17f351ea66508b3ec3112ed1cbdf30. Reason for revert: All fixes submitted for modules affected by the ENGINE_free API change. Change-Id: I30fafafa13ec0a6390f4a9211fbf3122a8b4865f
Diffstat (limited to 'src/third_party')
-rw-r--r--src/third_party/fiat/curve25519_32.h8
-rw-r--r--src/third_party/fiat/curve25519_64.h8
-rw-r--r--src/third_party/fiat/p256.c233
-rw-r--r--src/third_party/fiat/p256_32.h8
-rw-r--r--src/third_party/fiat/p256_64.h8
-rw-r--r--src/third_party/sike/LICENSE21
-rw-r--r--src/third_party/sike/asm/fp-armv8.pl915
-rwxr-xr-xsrc/third_party/sike/asm/fp-x86_64.pl1626
-rw-r--r--src/third_party/sike/asm/fp_generic.c181
-rw-r--r--src/third_party/sike/curve_params.c128
-rw-r--r--src/third_party/sike/fpx.c283
-rw-r--r--src/third_party/sike/fpx.h113
-rw-r--r--src/third_party/sike/isogeny.c260
-rw-r--r--src/third_party/sike/isogeny.h49
-rw-r--r--src/third_party/sike/sike.c531
-rw-r--r--src/third_party/sike/sike.h64
-rw-r--r--src/third_party/sike/sike_test.cc251
-rw-r--r--src/third_party/sike/utils.h145
-rw-r--r--src/third_party/wycheproof_testvectors/kwp_test.txt1562
19 files changed, 6265 insertions, 129 deletions
diff --git a/src/third_party/fiat/curve25519_32.h b/src/third_party/fiat/curve25519_32.h
index 820a5c9b..53772421 100644
--- a/src/third_party/fiat/curve25519_32.h
+++ b/src/third_party/fiat/curve25519_32.h
@@ -90,7 +90,13 @@ static void fiat_25519_subborrowx_u25(uint32_t* out1, fiat_25519_uint1* out2, fi
static void fiat_25519_cmovznz_u32(uint32_t* out1, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
fiat_25519_uint1 x1 = (!(!arg1));
uint32_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT32_C(0xffffffff));
- uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2));
+ // Note this line has been patched from the synthesized code to add value
+ // barriers.
+ //
+ // Clang recognizes this pattern as a select. While it usually transforms it
+ // to a cmov, it sometimes further transforms it into a branch, which we do
+ // not want.
+ uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2));
*out1 = x3;
}
diff --git a/src/third_party/fiat/curve25519_64.h b/src/third_party/fiat/curve25519_64.h
index 23bf361d..7c31ff99 100644
--- a/src/third_party/fiat/curve25519_64.h
+++ b/src/third_party/fiat/curve25519_64.h
@@ -58,7 +58,13 @@ static void fiat_25519_subborrowx_u51(uint64_t* out1, fiat_25519_uint1* out2, fi
static void fiat_25519_cmovznz_u64(uint64_t* out1, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
fiat_25519_uint1 x1 = (!(!arg1));
uint64_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
- uint64_t x3 = ((x2 & arg3) | ((~x2) & arg2));
+ // Note this line has been patched from the synthesized code to add value
+ // barriers.
+ //
+ // Clang recognizes this pattern as a select. While it usually transforms it
+ // to a cmov, it sometimes further transforms it into a branch, which we do
+ // not want.
+ uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2));
*out1 = x3;
}
diff --git a/src/third_party/fiat/p256.c b/src/third_party/fiat/p256.c
index ebc5de6f..23ec71f9 100644
--- a/src/third_party/fiat/p256.c
+++ b/src/third_party/fiat/p256.c
@@ -321,7 +321,10 @@ static void point_add(fe x3, fe y3, fe z3, const fe x1,
limb_t yneq = fe_nz(r);
- if (!xneq && !yneq && z1nz && z2nz) {
+ limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) &
+ ~constant_time_is_zero_w(z1nz) &
+ ~constant_time_is_zero_w(z2nz);
+ if (is_nontrivial_double) {
point_double(x3, y3, z3, x1, y1, z1);
return;
}
@@ -731,98 +734,6 @@ static char get_bit(const uint8_t *in, int i) {
return (in[i >> 3] >> (i & 7)) & 1;
}
-// Interleaved point multiplication using precomputed point multiples: The
-// small point multiples 0*P, 1*P, ..., 17*P are in p_pre_comp, the scalar
-// in p_scalar, if non-NULL. If g_scalar is non-NULL, we also add this multiple
-// of the generator, using certain (large) precomputed multiples in g_pre_comp.
-// Output point (X, Y, Z) is stored in x_out, y_out, z_out.
-static void batch_mul(fe x_out, fe y_out, fe z_out,
- const uint8_t *p_scalar, const uint8_t *g_scalar,
- const fe p_pre_comp[17][3]) {
- // set nq to the point at infinity
- fe nq[3] = {{0},{0},{0}}, ftmp, tmp[3];
- uint64_t bits;
- uint8_t sign, digit;
-
- // Loop over both scalars msb-to-lsb, interleaving additions of multiples
- // of the generator (two in each of the last 32 rounds) and additions of p
- // (every 5th round).
-
- int skip = 1; // save two point operations in the first round
- size_t i = p_scalar != NULL ? 255 : 31;
- for (;;) {
- // double
- if (!skip) {
- point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
- }
-
- // add multiples of the generator
- if (g_scalar != NULL && i <= 31) {
- // first, look 32 bits upwards
- bits = get_bit(g_scalar, i + 224) << 3;
- bits |= get_bit(g_scalar, i + 160) << 2;
- bits |= get_bit(g_scalar, i + 96) << 1;
- bits |= get_bit(g_scalar, i + 32);
- // select the point to add, in constant time
- select_point(bits, 16, g_pre_comp[1], tmp);
-
- if (!skip) {
- point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
- tmp[0], tmp[1], tmp[2]);
- } else {
- fe_copy(nq[0], tmp[0]);
- fe_copy(nq[1], tmp[1]);
- fe_copy(nq[2], tmp[2]);
- skip = 0;
- }
-
- // second, look at the current position
- bits = get_bit(g_scalar, i + 192) << 3;
- bits |= get_bit(g_scalar, i + 128) << 2;
- bits |= get_bit(g_scalar, i + 64) << 1;
- bits |= get_bit(g_scalar, i);
- // select the point to add, in constant time
- select_point(bits, 16, g_pre_comp[0], tmp);
- point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0],
- tmp[1], tmp[2]);
- }
-
- // do other additions every 5 doublings
- if (p_scalar != NULL && i % 5 == 0) {
- bits = get_bit(p_scalar, i + 4) << 5;
- bits |= get_bit(p_scalar, i + 3) << 4;
- bits |= get_bit(p_scalar, i + 2) << 3;
- bits |= get_bit(p_scalar, i + 1) << 2;
- bits |= get_bit(p_scalar, i) << 1;
- bits |= get_bit(p_scalar, i - 1);
- ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
-
- // select the point to add or subtract, in constant time.
- select_point(digit, 17, p_pre_comp, tmp);
- fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point.
- fe_cmovznz(tmp[1], sign, tmp[1], ftmp);
-
- if (!skip) {
- point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
- tmp[0], tmp[1], tmp[2]);
- } else {
- fe_copy(nq[0], tmp[0]);
- fe_copy(nq[1], tmp[1]);
- fe_copy(nq[2], tmp[2]);
- skip = 0;
- }
- }
-
- if (i == 0) {
- break;
- }
- --i;
- }
- fe_copy(x_out, nq[0]);
- fe_copy(y_out, nq[1]);
- fe_copy(z_out, nq[2]);
-}
-
// OPENSSL EC_METHOD FUNCTIONS
// Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
@@ -890,45 +801,116 @@ static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
fe_to_generic(&r->Z, z);
}
-static void ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar,
- const EC_RAW_POINT *p,
- const EC_SCALAR *p_scalar) {
+static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *p,
+ const EC_SCALAR *scalar) {
fe p_pre_comp[17][3];
- fe x_out, y_out, z_out;
+ OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
+ // Precompute multiples.
+ fe_from_generic(p_pre_comp[1][0], &p->X);
+ fe_from_generic(p_pre_comp[1][1], &p->Y);
+ fe_from_generic(p_pre_comp[1][2], &p->Z);
+ for (size_t j = 2; j <= 16; ++j) {
+ if (j & 1) {
+ point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
+ p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0,
+ p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
+ p_pre_comp[j - 1][2]);
+ } else {
+ point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
+ p_pre_comp[j / 2][0], p_pre_comp[j / 2][1],
+ p_pre_comp[j / 2][2]);
+ }
+ }
+
+ // Set nq to the point at infinity.
+ fe nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3];
+
+ // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round.
+ int skip = 1; // Save two point operations in the first round.
+ for (size_t i = 255; i < 256; i--) {
+ // double
+ if (!skip) {
+ point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
+ }
- if (p != NULL && p_scalar != NULL) {
- // We treat NULL scalars as 0, and NULL points as points at infinity, i.e.,
- // they contribute nothing to the linear combination.
- OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
- // Precompute multiples.
- fe_from_generic(p_pre_comp[1][0], &p->X);
- fe_from_generic(p_pre_comp[1][1], &p->Y);
- fe_from_generic(p_pre_comp[1][2], &p->Z);
- for (size_t j = 2; j <= 16; ++j) {
- if (j & 1) {
- point_add(p_pre_comp[j][0], p_pre_comp[j][1],
- p_pre_comp[j][2], p_pre_comp[1][0],
- p_pre_comp[1][1], p_pre_comp[1][2],
- 0,
- p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
- p_pre_comp[j - 1][2]);
+ // do other additions every 5 doublings
+ if (i % 5 == 0) {
+ uint64_t bits = get_bit(scalar->bytes, i + 4) << 5;
+ bits |= get_bit(scalar->bytes, i + 3) << 4;
+ bits |= get_bit(scalar->bytes, i + 2) << 3;
+ bits |= get_bit(scalar->bytes, i + 1) << 2;
+ bits |= get_bit(scalar->bytes, i) << 1;
+ bits |= get_bit(scalar->bytes, i - 1);
+ uint8_t sign, digit;
+ ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
+
+ // select the point to add or subtract, in constant time.
+ select_point(digit, 17, (const fe(*)[3])p_pre_comp, tmp);
+ fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point.
+ fe_cmovznz(tmp[1], sign, tmp[1], ftmp);
+
+ if (!skip) {
+ point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
+ tmp[0], tmp[1], tmp[2]);
} else {
- point_double(p_pre_comp[j][0], p_pre_comp[j][1],
- p_pre_comp[j][2], p_pre_comp[j / 2][0],
- p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
+ fe_copy(nq[0], tmp[0]);
+ fe_copy(nq[1], tmp[1]);
+ fe_copy(nq[2], tmp[2]);
+ skip = 0;
}
}
}
- batch_mul(x_out, y_out, z_out,
- (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL,
- g_scalar != NULL ? g_scalar->bytes : NULL,
- (const fe (*) [3])p_pre_comp);
+ fe_to_generic(&r->X, nq[0]);
+ fe_to_generic(&r->Y, nq[1]);
+ fe_to_generic(&r->Z, nq[2]);
+}
+
+static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
+ EC_RAW_POINT *r,
+ const EC_SCALAR *scalar) {
+ // Set nq to the point at infinity.
+ fe nq[3] = {{0}, {0}, {0}}, tmp[3];
+
+ int skip = 1; // Save two point operations in the first round.
+ for (size_t i = 31; i < 32; i--) {
+ if (!skip) {
+ point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
+ }
+
+ // First, look 32 bits upwards.
+ uint64_t bits = get_bit(scalar->bytes, i + 224) << 3;
+ bits |= get_bit(scalar->bytes, i + 160) << 2;
+ bits |= get_bit(scalar->bytes, i + 96) << 1;
+ bits |= get_bit(scalar->bytes, i + 32);
+ // Select the point to add, in constant time.
+ select_point(bits, 16, g_pre_comp[1], tmp);
+
+ if (!skip) {
+ point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0],
+ tmp[1], tmp[2]);
+ } else {
+ fe_copy(nq[0], tmp[0]);
+ fe_copy(nq[1], tmp[1]);
+ fe_copy(nq[2], tmp[2]);
+ skip = 0;
+ }
+
+ // Second, look at the current position.
+ bits = get_bit(scalar->bytes, i + 192) << 3;
+ bits |= get_bit(scalar->bytes, i + 128) << 2;
+ bits |= get_bit(scalar->bytes, i + 64) << 1;
+ bits |= get_bit(scalar->bytes, i);
+ // Select the point to add, in constant time.
+ select_point(bits, 16, g_pre_comp[0], tmp);
+ point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0],
+ tmp[1], tmp[2]);
+ }
- fe_to_generic(&r->X, x_out);
- fe_to_generic(&r->Y, y_out);
- fe_to_generic(&r->Z, z_out);
+ fe_to_generic(&r->X, nq[0]);
+ fe_to_generic(&r->Y, nq[1]);
+ fe_to_generic(&r->Z, nq[2]);
}
static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
@@ -1066,7 +1048,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
ec_GFp_nistp256_point_get_affine_coordinates;
out->add = ec_GFp_nistp256_add;
out->dbl = ec_GFp_nistp256_dbl;
- out->mul = ec_GFp_nistp256_points_mul;
+ out->mul = ec_GFp_nistp256_point_mul;
+ out->mul_base = ec_GFp_nistp256_point_mul_base;
out->mul_public = ec_GFp_nistp256_point_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;
out->felem_sqr = ec_GFp_mont_felem_sqr;
diff --git a/src/third_party/fiat/p256_32.h b/src/third_party/fiat/p256_32.h
index faaa0b04..638eb5d9 100644
--- a/src/third_party/fiat/p256_32.h
+++ b/src/third_party/fiat/p256_32.h
@@ -77,7 +77,13 @@ static void fiat_p256_mulx_u32(uint32_t* out1, uint32_t* out2, uint32_t arg1, ui
static void fiat_p256_cmovznz_u32(uint32_t* out1, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
fiat_p256_uint1 x1 = (!(!arg1));
uint32_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT32_C(0xffffffff));
- uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2));
+ // Note this line has been patched from the synthesized code to add value
+ // barriers.
+ //
+ // Clang recognizes this pattern as a select. While it usually transforms it
+ // to a cmov, it sometimes further transforms it into a branch, which we do
+ // not want.
+ uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2));
*out1 = x3;
}
diff --git a/src/third_party/fiat/p256_64.h b/src/third_party/fiat/p256_64.h
index 8e449c6b..7d97e0a0 100644
--- a/src/third_party/fiat/p256_64.h
+++ b/src/third_party/fiat/p256_64.h
@@ -79,7 +79,13 @@ static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, ui
static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
fiat_p256_uint1 x1 = (!(!arg1));
uint64_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
- uint64_t x3 = ((x2 & arg3) | ((~x2) & arg2));
+ // Note this line has been patched from the synthesized code to add value
+ // barriers.
+ //
+ // Clang recognizes this pattern as a select. While it usually transforms it
+ // to a cmov, it sometimes further transforms it into a branch, which we do
+ // not want.
+ uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2));
*out1 = x3;
}
diff --git a/src/third_party/sike/LICENSE b/src/third_party/sike/LICENSE
new file mode 100644
index 00000000..5cf7c8db
--- /dev/null
+++ b/src/third_party/sike/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE
diff --git a/src/third_party/sike/asm/fp-armv8.pl b/src/third_party/sike/asm/fp-armv8.pl
new file mode 100644
index 00000000..ce19d809
--- /dev/null
+++ b/src/third_party/sike/asm/fp-armv8.pl
@@ -0,0 +1,915 @@
+#! /usr/bin/env perl
+#
+# April 2019
+#
+# Abstract: field arithmetic in aarch64 assembly for SIDH/p434
+
+$flavour = shift;
+$output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../../crypto/perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT=*OUT;
+
+$PREFIX="sike";
+
+$code.=<<___;
+.section .rodata
+
+# p434 x 2
+.Lp434x2:
+ .quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
+ .quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
+ .quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
+
+# p434 + 1
+.Lp434p1:
+ .quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
+ .quad 0x6CFC5FD681C52056, 0x0002341F27177344
+
+.text
+___
+
+# Computes C0-C2 = A0 * (B0-B1)
+# Inputs remain intact
+sub mul64x128 {
+ my ($A0,$B0,$B1,$C0,$C1,$C2,$T0,$T1)=@_;
+ my $body=<<___;
+ mul $T1, $A0, $B0
+ umulh $B0, $A0, $B0
+ adds $C0, $C0, $C2
+ adc $C1, $C1, xzr
+
+ mul $T0, $A0, $B1
+ umulh $B1, $A0, $B1
+ adds $C0, $C0, $T1
+ adcs $C1, $C1, $B0
+ adc $C2, xzr, xzr
+
+ adds $C1, $C1, $T0
+ adc $C2, $C2, $B1
+___
+ return $body;
+}
+
+# Computes C0-C4 = A0 * (B0-B3)
+# Inputs remain intact
+sub mul64x256 {
+ my ($A0,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$C4,$T0,$T1,$T2)=@_;
+ my $body=<<___;
+ mul $C0, $A0, $B0 // C0
+ umulh $T0, $A0, $B0
+
+ mul $C1, $A0, $B1
+ umulh $T1, $A0, $B1
+ adds $C1, $C1, $T0 // C1
+ adc $T0, xzr, xzr
+
+ mul $C2, $A0, $B2
+ umulh $T2, $A0, $B2
+ adds $T1, $T0, $T1
+ adcs $C2, $C2, $T1 // C2
+ adc $T0, xzr, xzr
+
+ mul $C3, $A0, $B3
+ umulh $C4, $A0, $B3
+ adds $T2, $T0, $T2
+ adcs $C3, $C3, $T2 // C3
+ adc $C4, $C4, xzr // C4
+___
+ return $body;
+}
+
+# Computes C0-C4 = (A0-A1) * (B0-B3)
+# Inputs remain intact
+sub mul128x256 {
+ my ($A0,$A1,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$C4,$C5,$T0,$T1,$T2,$T3)=@_;
+ my $body=<<___;
+ mul $C0, $A0, $B0 // C0
+ umulh $C3, $A0, $B0
+
+ mul $C1, $A0, $B1
+ umulh $C2, $A0, $B1
+
+ mul $T0, $A1, $B0
+ umulh $T1, $A1, $B0
+ adds $C1, $C1, $C3
+ adc $C2, $C2, xzr
+
+ mul $T2, $A0, $B2
+ umulh $T3, $A0, $B2
+ adds $C1, $C1, $T0 // C1
+ adcs $C2, $C2, $T1
+ adc $C3, xzr, xzr
+
+ mul $T0, $A1, $B1
+ umulh $T1, $A1, $B1
+ adds $C2, $C2, $T2
+ adcs $C3, $C3, $T3
+ adc $C4, xzr, xzr
+
+ mul $T2, $A0, $B3
+ umulh $T3, $A0, $B3
+ adds $C2, $C2, $T0 // C2
+ adcs $C3, $C3, $T1
+ adc $C4, $C4, xzr
+
+ mul $T0, $A1, $B2
+ umulh $T1, $A1, $B2
+ adds $C3, $C3, $T2
+ adcs $C4, $C4, $T3
+ adc $C5, xzr, xzr
+
+ mul $T2, $A1, $B3
+ umulh $T3, $A1, $B3
+ adds $C3, $C3, $T0 // C3
+ adcs $C4, $C4, $T1
+ adc $C5, $C5, xzr
+ adds $C4, $C4, $T2 // C4
+ adc $C5, $C5, $T3 // C5
+
+___
+ return $body;
+}
+
+# Computes C0-C5 = (A0-A2) * (B0-B2)
+# Inputs remain intact
+sub mul192 {
+ my ($A0,$A1,$A2,$B0,$B1,$B2,$C0,$C1,$C2,$C3,$C4,$C5,$T0,$T1,$T2,$T3)=@_;
+ my $body=<<___;
+
+ // A0 * B0
+ mul $C0, $A0, $B0 // C0
+ umulh $C3, $A0, $B0
+
+ // A0 * B1
+ mul $C1, $A0, $B1
+ umulh $C2, $A0, $B1
+
+ // A1 * B0
+ mul $T0, $A1, $B0
+ umulh $T1, $A1, $B0
+ adds $C1, $C1, $C3
+ adc $C2, $C2, xzr
+
+ // A0 * B2
+ mul $T2, $A0, $B2
+ umulh $T3, $A0, $B2
+ adds $C1, $C1, $T0 // C1
+ adcs $C2, $C2, $T1
+ adc $C3, xzr, xzr
+
+ // A2 * B0
+ mul $T0, $A2, $B0
+ umulh $C4, $A2, $B0
+ adds $C2, $C2, $T2
+ adcs $C3, $C3, $C4
+ adc $C4, xzr, xzr
+
+ // A1 * B1
+ mul $T2, $A1, $B1
+ umulh $T1, $A1, $B1
+ adds $C2, $C2, $T0
+ adcs $C3, $C3, $T3
+ adc $C4, $C4, xzr
+
+ // A1 * B2
+ mul $T0, $A1, $B2
+ umulh $T3, $A1, $B2
+ adds $C2, $C2, $T2 // C2
+ adcs $C3, $C3, $T1
+ adc $C4, $C4, xzr
+
+ // A2 * B1
+ mul $T2, $A2, $B1
+ umulh $T1, $A2, $B1
+ adds $C3, $C3, $T0
+ adcs $C4, $C4, $T3
+ adc $C5, xzr, xzr
+
+ // A2 * B2
+ mul $T0, $A2, $B2
+ umulh $T3, $A2, $B2
+ adds $C3, $C3, $T2 // C3
+ adcs $C4, $C4, $T1
+ adc $C5, $C5, xzr
+
+ adds $C4, $C4, $T0 // C4
+ adc $C5, $C5, $T3 // C5
+___
+ return $body;
+}
+sub mul256_karatsuba {
+ my ($M,$A0,$A1,$A2,$A3,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$C4,$C5,$C6,$C7,$T0,$T1)=@_;
+ # (AH+AL) x (BH+BL), low part
+ my $mul_low=&mul64x128($A1, $C6, $T1, $C3, $C4, $C5, $C7, $A0);
+ # AL x BL
+ my $mul_albl=&mul64x128($A1, $B0, $B1, $C1, $T1, $C7, $C6, $A0);
+ # AH x BH
+ my $mul_ahbh=&mul64x128($A3, $B2, $B3, $A1, $C6, $B0, $B1, $A2);
+ my $body=<<___;
+ // A0-A1 <- AH + AL, T0 <- mask
+ adds $A0, $A0, $A2
+ adcs $A1, $A1, $A3
+ adc $T0, xzr, xzr
+
+ // C6, T1 <- BH + BL, C7 <- mask
+ adds $C6, $B0, $B2
+ adcs $T1, $B1, $B3
+ adc $C7, xzr, xzr
+
+ // C0-C1 <- masked (BH + BL)
+ sub $C2, xzr, $T0
+ sub $C3, xzr, $C7
+ and $C0, $C6, $C2
+ and $C1, $T1, $C2
+
+ // C4-C5 <- masked (AH + AL), T0 <- combined carry
+ and $C4, $A0, $C3
+ and $C5, $A1, $C3
+ mul $C2, $A0, $C6
+ mul $C3, $A0, $T1
+ and $T0, $T0, $C7
+
+ // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
+ adds $C0, $C4, $C0
+ umulh $C4, $A0, $T1
+ adcs $C1, $C5, $C1
+ umulh $C5, $A0, $C6
+ adc $T0, $T0, xzr
+
+ // C2-C5 <- (AH+AL) x (BH+BL), low part
+ $mul_low
+ ldp $A0, $A1, [$M,#0]
+
+ // C2-C5, T0 <- (AH+AL) x (BH+BL), final part
+ adds $C4, $C0, $C4
+ umulh $C7, $A0, $B0
+ umulh $T1, $A0, $B1
+ adcs $C5, $C1, $C5
+ mul $C0, $A0, $B0
+ mul $C1, $A0, $B1
+ adc $T0, $T0, xzr
+
+ // C0-C1, T1, C7 <- AL x BL
+ $mul_albl
+
+ // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
+ mul $A0, $A2, $B2
+ umulh $B0, $A2, $B2
+ subs $C2, $C2, $C0
+ sbcs $C3, $C3, $C1
+ sbcs $C4, $C4, $T1
+ mul $A1, $A2, $B3
+ umulh $C6, $A2, $B3
+ sbcs $C5, $C5, $C7
+ sbc $T0, $T0, xzr
+
+ // A0, A1, C6, B0 <- AH x BH
+ $mul_ahbh
+
+ // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+ subs $C2, $C2, $A0
+ sbcs $C3, $C3, $A1
+ sbcs $C4, $C4, $C6
+ sbcs $C5, $C5, $B0
+ sbc $T0, $T0, xzr
+
+ adds $C2, $C2, $T1
+ adcs $C3, $C3, $C7
+ adcs $C4, $C4, $A0
+ adcs $C5, $C5, $A1
+ adcs $C6, $T0, $C6
+ adc $C7, $B0, xzr
+___
+ return $body;
+}
+
+# 512-bit integer multiplication using Karatsuba (two levels),
+# Comba (lower level).
+# Operation: c [x2] = a [x0] * b [x1]
+sub mul {
+ # (AH+AL) x (BH+BL), low part
+ my $mul_kc_low=&mul256_karatsuba(
+ "x2", # M0
+ "x3","x4","x5","x6", # A0-A3
+ "x10","x11","x12","x13", # B0-B3
+ "x8","x9","x19","x20","x21","x22","x23","x24", # C0-C7
+ "x25","x26"); # TMP
+ # AL x BL
+ my $mul_albl=&mul256_karatsuba(
+ "x0", # M0f
+ "x3","x4","x5","x6", # A0-A3
+ "x10","x11","x12","x13", # B0-B3
+ "x21","x22","x23","x24","x25","x26","x27","x28",# C0-C7
+ "x8","x9"); # TMP
+ # AH x BH
+ my $mul_ahbh=&mul192(
+ "x3","x4","x5", # A0-A2
+ "x10","x11","x12", # B0-B2
+ "x21","x22","x23","x24","x25","x26", # C0-C5
+ "x8","x9","x27","x28"); # TMP
+
+ my $body=<<___;
+ .global ${PREFIX}_mpmul
+ .align 4
+ ${PREFIX}_mpmul:
+ stp x29, x30, [sp,#-96]!
+ add x29, sp, #0
+ stp x19, x20, [sp,#16]
+ stp x21, x22, [sp,#32]
+ stp x23, x24, [sp,#48]
+ stp x25, x26, [sp,#64]
+ stp x27, x28, [sp,#80]
+
+ ldp x3, x4, [x0]
+ ldp x5, x6, [x0,#16]
+ ldp x7, x8, [x0,#32]
+ ldr x9, [x0,#48]
+ ldp x10, x11, [x1,#0]
+ ldp x12, x13, [x1,#16]
+ ldp x14, x15, [x1,#32]
+ ldr x16, [x1,#48]
+
+ // x3-x7 <- AH + AL, x7 <- carry
+ adds x3, x3, x7
+ adcs x4, x4, x8
+ adcs x5, x5, x9
+ adcs x6, x6, xzr
+ adc x7, xzr, xzr
+
+ // x10-x13 <- BH + BL, x8 <- carry
+ adds x10, x10, x14
+ adcs x11, x11, x15
+ adcs x12, x12, x16
+ adcs x13, x13, xzr
+ adc x8, xzr, xzr
+
+ // x9 <- combined carry
+ and x9, x7, x8
+ // x7-x8 <- mask
+ sub x7, xzr, x7
+ sub x8, xzr, x8
+
+ // x15-x19 <- masked (BH + BL)
+ and x14, x10, x7
+ and x15, x11, x7
+ and x16, x12, x7
+ and x17, x13, x7
+
+ // x20-x23 <- masked (AH + AL)
+ and x20, x3, x8
+ and x21, x4, x8
+ and x22, x5, x8
+ and x23, x6, x8
+
+ // x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
+ adds x14, x14, x20
+ adcs x15, x15, x21
+ adcs x16, x16, x22
+ adcs x17, x17, x23
+ adc x7, x9, xzr
+
+ // x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
+ stp x3, x4, [x2,#0]
+ $mul_kc_low
+
+ // x15-x19, x7 <- (AH+AL) x (BH+BL), final step
+ adds x14, x14, x21
+ adcs x15, x15, x22
+ adcs x16, x16, x23
+ adcs x17, x17, x24
+ adc x7, x7, xzr
+
+ // Load AL
+ ldp x3, x4, [x0]
+ ldp x5, x6, [x0,#16]
+ // Load BL
+ ldp x10, x11, [x1,#0]
+ ldp x12, x13, [x1,#16]
+
+ // Temporarily store x8 in x2
+ stp x8, x9, [x2,#0]
+ // x21-x28 <- AL x BL
+ $mul_albl
+ // Restore x8
+ ldp x8, x9, [x2,#0]
+
+ // x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
+ subs x8, x8, x21
+ sbcs x9, x9, x22
+ sbcs x19, x19, x23
+ sbcs x20, x20, x24
+ sbcs x14, x14, x25
+ sbcs x15, x15, x26
+ sbcs x16, x16, x27
+ sbcs x17, x17, x28
+ sbc x7, x7, xzr
+
+ // Store ALxBL, low
+ stp x21, x22, [x2]
+ stp x23, x24, [x2,#16]
+
+ // Load AH
+ ldp x3, x4, [x0,#32]
+ ldr x5, [x0,#48]
+ // Load BH
+ ldp x10, x11, [x1,#32]
+ ldr x12, [x1,#48]
+
+ adds x8, x8, x25
+ adcs x9, x9, x26
+ adcs x19, x19, x27
+ adcs x20, x20, x28
+ adc x1, xzr, xzr
+
+ add x0, x0, #32
+ // Temporarily store x8,x9 in x2
+ stp x8,x9, [x2,#32]
+ // x21-x28 <- AH x BH
+ $mul_ahbh
+ // Restore x8,x9
+ ldp x8,x9, [x2,#32]
+
+ neg x1, x1
+
+ // x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+ subs x8, x8, x21
+ sbcs x9, x9, x22
+ sbcs x19, x19, x23
+ sbcs x20, x20, x24
+ sbcs x14, x14, x25
+ sbcs x15, x15, x26
+ sbcs x16, x16, xzr
+ sbcs x17, x17, xzr
+ sbc x7, x7, xzr
+
+ // Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
+ stp x8, x9, [x2,#32]
+ stp x19, x20, [x2,#48]
+
+ adds x1, x1, #1
+ adcs x14, x14, x21
+ adcs x15, x15, x22
+ adcs x16, x16, x23
+ adcs x17, x17, x24
+ adcs x25, x7, x25
+ adc x26, x26, xzr
+
+ stp x14, x15, [x2,#64]
+ stp x16, x17, [x2,#80]
+ stp x25, x26, [x2,#96]
+
+ ldp x19, x20, [x29,#16]
+ ldp x21, x22, [x29,#32]
+ ldp x23, x24, [x29,#48]
+ ldp x25, x26, [x29,#64]
+ ldp x27, x28, [x29,#80]
+ ldp x29, x30, [sp],#96
+ ret
+___
+ return $body;
+}
+$code.=&mul();
+
+# Montgomery reduction
+# Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015
+# Operation: mc [x1] = ma [x0]
+# NOTE: ma=mc is not allowed
+sub rdc {
+ my $mul01=&mul128x256(
+ "x2","x3", # A0-A1
+ "x23","x24","x25","x26", # B0-B3
+ "x4","x5","x6","x7","x8","x9", # C0-C5
+ "x10","x11","x27","x28"); # TMP
+ my $mul23=&mul128x256(
+ "x2","x10", # A0-A1
+ "x23","x24","x25","x26", # B0-B3
+ "x4","x5","x6","x7","x8","x9", # C0-C5
+ "x0","x3","x27","x28"); # TMP
+ my $mul45=&mul128x256(
+ "x11","x12", # A0-A1
+ "x23","x24","x25","x26", # B0-B3
+ "x4","x5","x6","x7","x8","x9", # C0-C5
+ "x10","x3","x27","x28"); # TMP
+ my $mul67=&mul64x256(
+ "x13", # A0
+ "x23","x24","x25","x26", # B0-B3
+ "x4","x5","x6","x7","x8", # C0-C4
+ "x10","x27","x28"); # TMP
+ my $body=<<___;
+ .global ${PREFIX}_fprdc
+ .align 4
+ ${PREFIX}_fprdc:
+ stp x29, x30, [sp, #-96]!
+ add x29, sp, xzr
+ stp x19, x20, [sp,#16]
+ stp x21, x22, [sp,#32]
+ stp x23, x24, [sp,#48]
+ stp x25, x26, [sp,#64]
+ stp x27, x28, [sp,#80]
+
+ ldp x2, x3, [x0,#0] // a[0-1]
+
+ // Load the prime constant
+ adrp x26, :pg_hi21:.Lp434p1
+ add x26, x26, :lo12:.Lp434p1
+ ldp x23, x24, [x26, #0x0]
+ ldp x25, x26, [x26,#0x10]
+
+ // a[0-1] * p434+1
+ $mul01
+
+ ldp x10, x11, [x0, #0x18]
+ ldp x12, x13, [x0, #0x28]
+ ldp x14, x15, [x0, #0x38]
+ ldp x16, x17, [x0, #0x48]
+ ldp x19, x20, [x0, #0x58]
+ ldr x21, [x0, #0x68]
+
+ adds x10, x10, x4
+ adcs x11, x11, x5
+ adcs x12, x12, x6
+ adcs x13, x13, x7
+ adcs x14, x14, x8
+ adcs x15, x15, x9
+ adcs x22, x16, xzr
+ adcs x17, x17, xzr
+ adcs x19, x19, xzr
+ adcs x20, x20, xzr
+ adc x21, x21, xzr
+
+ ldr x2, [x0,#0x10] // a[2]
+ // a[2-3] * p434+1
+ $mul23
+
+ adds x12, x12, x4
+ adcs x13, x13, x5
+ adcs x14, x14, x6
+ adcs x15, x15, x7
+ adcs x16, x22, x8
+ adcs x17, x17, x9
+ adcs x22, x19, xzr
+ adcs x20, x20, xzr
+ adc x21, x21, xzr
+
+ $mul45
+ adds x14, x14, x4
+ adcs x15, x15, x5
+ adcs x16, x16, x6
+ adcs x17, x17, x7
+ adcs x19, x22, x8
+ adcs x20, x20, x9
+ adc x22, x21, xzr
+
+ stp x14, x15, [x1, #0x0] // C0, C1
+
+ $mul67
+ adds x16, x16, x4
+ adcs x17, x17, x5
+ adcs x19, x19, x6
+ adcs x20, x20, x7
+ adc x21, x22, x8
+
+ str x16, [x1, #0x10]
+ stp x17, x19, [x1, #0x18]
+ stp x20, x21, [x1, #0x28]
+
+ ldp x19, x20, [x29,#16]
+ ldp x21, x22, [x29,#32]
+ ldp x23, x24, [x29,#48]
+ ldp x25, x26, [x29,#64]
+ ldp x27, x28, [x29,#80]
+ ldp x29, x30, [sp],#96
+ ret
+___
+}
+$code.=&rdc();
+
+# Field addition
+# Operation: c [x2] = a [x0] + b [x1]
+$code.=<<___;
+ .global ${PREFIX}_fpadd
+ .align 4
+ ${PREFIX}_fpadd:
+ stp x29,x30, [sp,#-16]!
+ add x29, sp, #0
+
+ ldp x3, x4, [x0,#0]
+ ldp x5, x6, [x0,#16]
+ ldp x7, x8, [x0,#32]
+ ldr x9, [x0,#48]
+ ldp x11, x12, [x1,#0]
+ ldp x13, x14, [x1,#16]
+ ldp x15, x16, [x1,#32]
+ ldr x17, [x1,#48]
+
+ // Add a + b
+ adds x3, x3, x11
+ adcs x4, x4, x12
+ adcs x5, x5, x13
+ adcs x6, x6, x14
+ adcs x7, x7, x15
+ adcs x8, x8, x16
+ adc x9, x9, x17
+
+ // Subtract 2xp434
+ adrp x17, :pg_hi21:.Lp434x2
+ add x17, x17, :lo12:.Lp434x2
+ ldp x11, x12, [x17, #0]
+ ldp x13, x14, [x17, #16]
+ ldp x15, x16, [x17, #32]
+ subs x3, x3, x11
+ sbcs x4, x4, x12
+ sbcs x5, x5, x12
+ sbcs x6, x6, x13
+ sbcs x7, x7, x14
+ sbcs x8, x8, x15
+ sbcs x9, x9, x16
+ sbc x0, xzr, xzr // x0 can be reused now
+
+ // Add 2xp434 anded with the mask in x0
+ and x11, x11, x0
+ and x12, x12, x0
+ and x13, x13, x0
+ and x14, x14, x0
+ and x15, x15, x0
+ and x16, x16, x0
+
+ adds x3, x3, x11
+ adcs x4, x4, x12
+ adcs x5, x5, x12
+ adcs x6, x6, x13
+ adcs x7, x7, x14
+ adcs x8, x8, x15
+ adc x9, x9, x16
+
+ stp x3, x4, [x2,#0]
+ stp x5, x6, [x2,#16]
+ stp x7, x8, [x2,#32]
+ str x9, [x2,#48]
+
+ ldp x29, x30, [sp],#16
+ ret
+___
+
+# Field subtraction
+# Operation: c [x2] = a [x0] - b [x1]
+$code.=<<___;
+ .global ${PREFIX}_fpsub
+ .align 4
+ ${PREFIX}_fpsub:
+ stp x29, x30, [sp,#-16]!
+ add x29, sp, #0
+
+ ldp x3, x4, [x0,#0]
+ ldp x5, x6, [x0,#16]
+ ldp x7, x8, [x0,#32]
+ ldr x9, [x0,#48]
+ ldp x11, x12, [x1,#0]
+ ldp x13, x14, [x1,#16]
+ ldp x15, x16, [x1,#32]
+ ldr x17, [x1,#48]
+
+ // Subtract a - b
+ subs x3, x3, x11
+ sbcs x4, x4, x12
+ sbcs x5, x5, x13
+ sbcs x6, x6, x14
+ sbcs x7, x7, x15
+ sbcs x8, x8, x16
+ sbcs x9, x9, x17
+ sbc x0, xzr, xzr
+
+ // Add 2xp434 anded with the mask in x0
+ adrp x17, :pg_hi21:.Lp434x2
+ add x17, x17, :lo12:.Lp434x2
+
+ // First half
+ ldp x11, x12, [x17, #0]
+ ldp x13, x14, [x17, #16]
+ ldp x15, x16, [x17, #32]
+
+ // Add 2xp434 anded with the mask in x0
+ and x11, x11, x0
+ and x12, x12, x0
+ and x13, x13, x0
+ and x14, x14, x0
+ and x15, x15, x0
+ and x16, x16, x0
+
+ adds x3, x3, x11
+ adcs x4, x4, x12
+ adcs x5, x5, x12
+ adcs x6, x6, x13
+ adcs x7, x7, x14
+ adcs x8, x8, x15
+ adc x9, x9, x16
+
+ stp x3, x4, [x2,#0]
+ stp x5, x6, [x2,#16]
+ stp x7, x8, [x2,#32]
+ str x9, [x2,#48]
+
+ ldp x29, x30, [sp],#16
+ ret
+___
+
+# 434-bit multiprecision addition
+# Operation: c [x2] = a [x0] + b [x1]
+$code.=<<___;
+ .global ${PREFIX}_mpadd_asm
+ .align 4
+ ${PREFIX}_mpadd_asm:
+ stp x29, x30, [sp,#-16]!
+ add x29, sp, #0
+
+ ldp x3, x4, [x0,#0]
+ ldp x5, x6, [x0,#16]
+ ldp x7, x8, [x0,#32]
+ ldr x9, [x0,#48]
+ ldp x11, x12, [x1,#0]
+ ldp x13, x14, [x1,#16]
+ ldp x15, x16, [x1,#32]
+ ldr x17, [x1,#48]
+
+ adds x3, x3, x11
+ adcs x4, x4, x12
+ adcs x5, x5, x13
+ adcs x6, x6, x14
+ adcs x7, x7, x15
+ adcs x8, x8, x16
+ adc x9, x9, x17
+
+ stp x3, x4, [x2,#0]
+ stp x5, x6, [x2,#16]
+ stp x7, x8, [x2,#32]
+ str x9, [x2,#48]
+
+ ldp x29, x30, [sp],#16
+ ret
+___
+
+# 2x434-bit multiprecision subtraction
+# Operation: c [x2] = a [x0] - b [x1].
+# Returns borrow mask
+$code.=<<___;
+ .global ${PREFIX}_mpsubx2_asm
+ .align 4
+ ${PREFIX}_mpsubx2_asm:
+ stp x29, x30, [sp,#-16]!
+ add x29, sp, #0
+
+ ldp x3, x4, [x0,#0]
+ ldp x5, x6, [x0,#16]
+ ldp x11, x12, [x1,#0]
+ ldp x13, x14, [x1,#16]
+ subs x3, x3, x11
+ sbcs x4, x4, x12
+ sbcs x5, x5, x13
+ sbcs x6, x6, x14
+ ldp x7, x8, [x0,#32]
+ ldp x9, x10, [x0,#48]
+ ldp x11, x12, [x1,#32]
+ ldp x13, x14, [x1,#48]
+ sbcs x7, x7, x11
+ sbcs x8, x8, x12
+ sbcs x9, x9, x13
+ sbcs x10, x10, x14
+
+ stp x3, x4, [x2,#0]
+ stp x5, x6, [x2,#16]
+ stp x7, x8, [x2,#32]
+ stp x9, x10, [x2,#48]
+
+ ldp x3, x4, [x0,#64]
+ ldp x5, x6, [x0,#80]
+ ldp x11, x12, [x1,#64]
+ ldp x13, x14, [x1,#80]
+ sbcs x3, x3, x11
+ sbcs x4, x4, x12
+ sbcs x5, x5, x13
+ sbcs x6, x6, x14
+ ldp x7, x8, [x0,#96]
+ ldp x11, x12, [x1,#96]
+ sbcs x7, x7, x11
+ sbcs x8, x8, x12
+ sbc x0, xzr, xzr
+
+ stp x3, x4, [x2,#64]
+ stp x5, x6, [x2,#80]
+ stp x7, x8, [x2,#96]
+
+ ldp x29, x30, [sp],#16
+ ret
+___
+
+
+# Double 2x434-bit multiprecision subtraction
+# Operation: c [x2] = c [x2] - a [x0] - b [x1]
+$code.=<<___;
+ .global ${PREFIX}_mpdblsubx2_asm
+ .align 4
+ ${PREFIX}_mpdblsubx2_asm:
+ stp x29, x30, [sp, #-16]!
+ add x29, sp, #0
+
+ ldp x3, x4, [x2, #0]
+ ldp x5, x6, [x2,#16]
+ ldp x7, x8, [x2,#32]
+
+ ldp x11, x12, [x0, #0]
+ ldp x13, x14, [x0,#16]
+ ldp x15, x16, [x0,#32]
+
+ subs x3, x3, x11
+ sbcs x4, x4, x12
+ sbcs x5, x5, x13
+ sbcs x6, x6, x14
+ sbcs x7, x7, x15
+ sbcs x8, x8, x16
+
+ // x9 stores carry
+ adc x9, xzr, xzr
+
+ ldp x11, x12, [x1, #0]
+ ldp x13, x14, [x1,#16]
+ ldp x15, x16, [x1,#32]
+ subs x3, x3, x11
+ sbcs x4, x4, x12
+ sbcs x5, x5, x13
+ sbcs x6, x6, x14
+ sbcs x7, x7, x15
+ sbcs x8, x8, x16
+ adc x9, x9, xzr
+
+ stp x3, x4, [x2, #0]
+ stp x5, x6, [x2,#16]
+ stp x7, x8, [x2,#32]
+
+ ldp x3, x4, [x2,#48]
+ ldp x5, x6, [x2,#64]
+ ldp x7, x8, [x2,#80]
+
+ ldp x11, x12, [x0,#48]
+ ldp x13, x14, [x0,#64]
+ ldp x15, x16, [x0,#80]
+
+ // x9 = 2 - x9
+ neg x9, x9
+ add x9, x9, #2
+
+ subs x3, x3, x9
+ sbcs x3, x3, x11
+ sbcs x4, x4, x12
+ sbcs x5, x5, x13
+ sbcs x6, x6, x14
+ sbcs x7, x7, x15
+ sbcs x8, x8, x16
+ adc x9, xzr, xzr
+
+ ldp x11, x12, [x1,#48]
+ ldp x13, x14, [x1,#64]
+ ldp x15, x16, [x1,#80]
+ subs x3, x3, x11
+ sbcs x4, x4, x12
+ sbcs x5, x5, x13
+ sbcs x6, x6, x14
+ sbcs x7, x7, x15
+ sbcs x8, x8, x16
+ adc x9, x9, xzr
+
+ stp x3, x4, [x2,#48]
+ stp x5, x6, [x2,#64]
+ stp x7, x8, [x2,#80]
+
+ ldp x3, x4, [x2,#96]
+ ldp x11, x12, [x0,#96]
+ ldp x13, x14, [x1,#96]
+
+ // x9 = 2 - x9
+ neg x9, x9
+ add x9, x9, #2
+
+ subs x3, x3, x9
+ sbcs x3, x3, x11
+ sbcs x4, x4, x12
+ subs x3, x3, x13
+ sbc x4, x4, x14
+ stp x3, x4, [x2,#96]
+
+ ldp x29, x30, [sp],#16
+ ret
+___
+
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/ge;
+ print $_,"\n";
+}
+
+close STDOUT;
diff --git a/src/third_party/sike/asm/fp-x86_64.pl b/src/third_party/sike/asm/fp-x86_64.pl
new file mode 100755
index 00000000..cffde1a8
--- /dev/null
+++ b/src/third_party/sike/asm/fp-x86_64.pl
@@ -0,0 +1,1626 @@
+#! /usr/bin/env perl
+#
+# April 2019
+#
+# Abstract: field arithmetic in x64 assembly for SIDH/p434
+
+$flavour = shift;
+$output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT=*OUT;
+
+$PREFIX="sike";
+$bmi2_adx = 1;
+
+$code.=<<___;
+.text
+
+# p434 x 2
+.Lp434x2:
+.quad 0xFFFFFFFFFFFFFFFE
+.quad 0xFFFFFFFFFFFFFFFF
+.quad 0xFB82ECF5C5FFFFFF
+.quad 0xF78CB8F062B15D47
+.quad 0xD9F8BFAD038A40AC
+.quad 0x0004683E4E2EE688
+
+# p434 + 1
+.Lp434p1:
+.quad 0xFDC1767AE3000000
+.quad 0x7BC65C783158AEA3
+.quad 0x6CFC5FD681C52056
+.quad 0x0002341F27177344
+
+.extern OPENSSL_ia32cap_P
+.hidden OPENSSL_ia32cap_P
+___
+
+# Jump to alternative implemenatation provided as an
+# argument in case CPU supports ADOX/ADCX and MULX instructions.
+sub alt_impl {
+ $jmp_func = shift;
+
+ $body=<<___;
+ lea OPENSSL_ia32cap_P(%rip), %rcx
+ mov 8(%rcx), %rcx
+ and \$0x80100, %ecx
+ cmp \$0x80100, %ecx
+ je $jmp_func
+
+___
+ return $body
+}
+
+# Performs schoolbook multiplication of 2 192-bit numbers. Uses
+# MULX instruction. Result is stored in 192 bits pointed by $DST.
+sub mul192 {
+ my ($idxM0,$M0,$idxM1,$M1,$idxDST,$DST,$T0,$T1,$T2,$T3,$T4,$T5,$T6)=@_;
+ my ($ML0,$ML8,$ML16)=map("$idxM0+$_($M0)",(0,8,16));
+ my ($MR0,$MR8,$MR16)=map("$idxM1+$_($M1)",(0,8,16));
+ my ($D0,$D1,$D2,$D3,$D4,$D5)=map("$idxDST+$_($DST)",(0,8,16,24,32,40));
+
+ $body=<<___;
+ mov $ML0, %rdx
+ mulx $MR0, $T1, $T0 # T0:T1 = A0*B0
+ mov $T1, $D0 # DST0
+ mulx $MR8, $T2, $T1 # T1:T2 = A0*B1
+ xor %rax, %rax
+ adox $T2, $T0
+ mulx $MR16,$T3, $T2 # T2:T3 = A0*B2
+ adox $T3, $T1
+
+ mov $ML8, %rdx
+ mulx $MR0, $T4, $T3 # T3:T4 = A1*B0
+ adox %rax, $T2
+ xor %rax, %rax
+
+ mulx $MR8, $T6, $T5 # T6:T7 = A1*B1
+ adox $T0, $T4
+ mov $T4, $D1 # DST1
+ adcx $T6, $T3
+
+ mulx $MR16,$T0, $T6 # T6:T0 = A1*B2
+ adox $T1, $T3
+ adcx $T0, $T5
+ adcx %rax, $T6
+ adox $T2, $T5
+
+ mov $ML16,%rdx
+ mulx $MR0, $T0, $T1 # T1:T0 = A2*B0
+ adox %rax, $T6
+ xor %rax, %rax
+
+ mulx $MR8, $T2, $T4 # T4:T2 = A2*B1
+ adox $T3, $T0
+ mov $T0, $D2 # DST2
+ adcx $T5, $T1
+
+ mulx $MR16,$T3, $T0 # T0:T3 = A2*B2
+ adcx $T6, $T4
+ adcx %rax, $T0
+ adox $T2, $T1
+ adox $T4, $T3
+ adox %rax, $T0
+ mov $T1, $D3 # DST3
+ mov $T3, $D4 # DST4
+ mov $T0, $D5 # DST5
+
+___
+ return $body;
+}
+
+# Performs schoolbook multiplication of 2 256-bit numbers. Uses
+# MULX instruction. Result is stored in 256 bits pointed by $DST.
+sub mul256 {
+ my ($idxM0,$M0,$idxM1,$M1,$idxDST,$DST,$T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8,$T9)=@_;
+ my ($ML0,$ML8,$ML16,$ML24)=map("$idxM0+$_($M0)",(0,8,16,24));
+ my ($MR0,$MR8,$MR16,$MR24)=map("$idxM1+$_($M1)",(0,8,16,24));
+ my ($D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7)=map("$idxDST+$_($DST)",(0,8,16,24,32,40,48,56));
+
+ $body=<<___;
+ mov $ML0, %rdx
+ mulx $MR0, $T1, $T0 # T0:T1 = A0*B0
+ mov $T1, $D0 # DST0_final
+ mulx $MR8, $T2, $T1 # T1:T2 = A0*B1
+ xor %rax, %rax
+ adox $T2, $T0
+ mulx $MR16,$T3, $T2 # T2:T3 = A0*B2
+ adox $T3, $T1
+ mulx $MR24,$T4, $T3 # T3:T4 = A0*B3
+ adox $T4, $T2
+
+ mov $ML8, %rdx
+ mulx $MR0, $T4, $T5 # T5:T4 = A1*B0
+ adox %rax, $T3
+ xor %rax, %rax
+ mulx $MR8, $T7, $T6 # T6:T7 = A1*B1
+ adox $T0, $T4
+ mov $T4, $D1 # DST1_final
+ adcx $T7, $T5
+ mulx $MR16,$T8, $T7 # T7:T8 = A1*B2
+ adcx $T8, $T6
+ adox $T1, $T5
+ mulx $MR24,$T9, $T8 # T8:T9 = A1*B3
+ adcx $T9, $T7
+ adcx %rax, $T8
+ adox $T2, $T6
+
+ mov $ML16,%rdx
+ mulx $MR0, $T0, $T1 # T1:T0 = A2*B0
+ adox $T3, $T7
+ adox %rax, $T8
+ xor %rax, %rax
+ mulx $MR8, $T3, $T2 # T2:T3 = A2*B1
+ adox $T5, $T0
+ mov $T0, $D2 # DST2_final
+ adcx $T3, $T1
+ mulx $MR16,$T4, $T3 # T3:T4 = A2*B2
+ adcx $T4, $T2
+ adox $T6, $T1
+ mulx $MR24,$T9, $T4 # T3:T4 = A2*B3
+ adcx $T9, $T3
+ adcx %rax, $T4
+
+ adox $T7, $T2
+ adox $T8, $T3
+ adox %rax, $T4
+
+ mov $ML24,%rdx
+ mulx $MR0, $T0, $T5 # T5:T0 = A3*B0
+ xor %rax, %rax
+ mulx $MR8, $T7, $T6 # T6:T7 = A3*B1
+ adcx $T7, $T5
+ adox $T0, $T1
+ mulx $MR16, $T8, $T7 # T7:T8 = A3*B2
+ adcx $T8, $T6
+ adox $T5, $T2
+ mulx $MR24, $T9, $T8 # T8:T9 = A3*B3
+ adcx $T9, $T7
+ adcx %rax, $T8
+ adox $T6, $T3
+ adox $T7, $T4
+ adox %rax, $T8
+ mov $T1, $D3 # DST3_final
+ mov $T2, $D4 # DST4_final
+ mov $T3, $D5 # DST5_final
+ mov $T4, $D6 # DST6_final
+ mov $T8, $D7 # DST7_final
+
+___
+ return $body;
+}
+
+# Performs schoolbook multiplication of 64-bit with 256-bit
+# number.
+sub mul64x256 {
+ my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5)=@_;
+ my $body.=<<___;
+ mov $idxM0($M0), $T5
+
+ xor $T2, $T2
+ mov 0+$M1, %rax
+ mul $T5
+ mov %rax, $T0 # C0
+ mov %rdx, $T1
+
+ xor $T3, $T3
+ mov 8+$M1, %rax
+ mul $T5
+ add %rax, $T1 # C1
+ adc %rdx, $T2
+
+ xor $T4, $T4
+ mov 16+$M1, %rax
+ mul $T5
+ add %rax, $T2 # C2
+ adc %rdx, $T3
+
+ mov 24+$M1, %rax
+ mul $T5
+ add %rax, $T3 # C3
+ adc %rdx, $T4 # C4
+___
+ return $body;
+}
+
+# Performs schoolbook multiplication of 64-bit with 256-bit
+# number. Uses MULX and ADOX instructions.
+sub mulx64x256 {
+ my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5)=@_;
+ my $body.=<<___;
+ xor %rax, %rax
+ mov $idxM0($M0), %rdx
+ mulx 0+$M1, $T0, $T1 # T0 <- C0
+ mulx 8+$M1, $T4, $T2
+ mulx 16+$M1, $T5, $T3
+
+ adox $T4, $T1 # T1 <- C1
+ adox $T5, $T2 # T2 <- C2
+
+ mulx 24+$M1, $T5, $T4
+ adox $T5, $T3 # T3 <- C3
+ adox %rax, $T4 # T4 <- C4
+___
+ return $body;
+}
+
+# Performs schoolbook multiplication of 128-bit with 256-bit
+# number. Destroys RAX and RDX
+sub mul128x256 {
+ my ($idxMA,$MA,$MB,$C0,$C1,$C2,$C3,$C4,$C5,$T0,$T1)=@_;
+ my ($MA0,$MA8)=map("$idxMA+$_($MA)", (0,8));
+ my $body.=<<___;
+ # A0 x B0
+ mov $MA0, $T0
+ mov 0+$MB, %rax
+ mul $T0
+ xor $C2, $C2
+ mov %rax, $C0 # c0
+ mov %rdx, $C1
+
+ # A0 x B1
+ mov 8+$MB, %rax
+ mul $T0
+ xor $C3, $C3
+ add %rax, $C1
+ adc %rdx, $C2
+
+ # A1 x B0
+ mov $MA8, $T1
+ mov 0+$MB, %rax
+ mul $T1
+ add %rax, $C1
+ adc %rdx, $C2
+ adc \$0x0, $C3
+
+ # A0 x B2
+ xor $C4, $C4
+ mov 16+$MB, %rax
+ mul $T0
+ add %rax, $C2
+ adc %rdx, $C3
+ adc \$0x0, $C4
+
+ # A1 x B1
+ mov 8+$MB, %rax
+ mul $T1
+ add %rax, $C2 # c2
+ adc %rdx, $C3
+ adc \$0x0, $C4
+
+ # A0 x B3
+ mov 24+$MB, %rax
+ mul $T0
+ xor $C5, $C5
+ add %rax, $C3
+ adc %rdx, $C4
+ adc \$0x0, $C5
+
+ # A1 x B2
+ mov 16+$MB, %rax
+ mul $T1
+ add %rax, $C3 # c3
+ adc %rdx, $C4
+ adc \$0x0, $C5
+
+ # A1 x B3
+ mov 24+$MB, %rax
+ mul $T1
+ add %rax, $C4
+ adc %rdx, $C5
+
+___
+ return $body;
+}
+
+# Performs schoolbook multiplication of 128-bit with 256-bit
+# number. Uses MULX, ADOX, ADCX instruction.
+sub mulx128x256 {
+ my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5,$T6)=@_;
+ my ($MUL0,$MUL8)=map("$idxM0+$_($M0)", (0,8));
+ my $body.=<<___;
+ xor %rax, %rax
+ mov $MUL0, %rdx
+ mulx 0+$M1, $T0, $T1 # T0 <- C0
+ mulx 8+$M1, $T4, $T2
+ mulx 16+$M1, $T5, $T3
+
+ adox $T4, $T1 # T1: interm1
+ adox $T5, $T2 # T2: interm2
+
+ mulx 24+$M1, $T5, $T4
+ adox $T5, $T3 # T3: interm3
+ adox %rax, $T4 # T4: interm4
+
+ xor %rax, %rax
+ mov $MUL8, %rdx
+ mulx 0+$M1, $T5, $T6
+ adcx $T5, $T1 # T1 <- C1
+ adcx $T6, $T2
+
+ mulx 8+$M1, $T6, $T5
+ adcx $T5, $T3
+ adox $T6, $T2 # T2 <- C2
+
+ mulx 16+$M1, $T6, $T5
+ adcx $T5, $T4
+ adox $T6, $T3 # T3 <- C3
+
+ mulx 24+$M1, $T6, $T5
+ adcx %rax, $T5
+ adox $T6, $T4 # T4 <- C4
+ adox %rax, $T5 # T5 <- C5
+___
+ return $body;
+}
+
+# Compute z = x + y (mod p).
+# Operation: c [rdx] = a [rdi] + b [rsi]
+$code.=<<___;
+.globl ${PREFIX}_fpadd
+.type ${PREFIX}_fpadd,\@function,3
+${PREFIX}_fpadd:
+.cfi_startproc
+ push %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset r12, -16
+ push %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset r13, -24
+ push %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset r14, -32
+
+ xor %rax, %rax
+
+ mov 0x0(%rdi), %r8
+ add 0x0(%rsi), %r8
+ mov 0x8(%rdi), %r9
+ adc 0x8(%rsi), %r9
+ mov 0x10(%rdi), %r10
+ adc 0x10(%rsi), %r10
+ mov 0x18(%rdi), %r11
+ adc 0x18(%rsi), %r11
+ mov 0x20(%rdi), %r12
+ adc 0x20(%rsi), %r12
+ mov 0x28(%rdi), %r13
+ adc 0x28(%rsi), %r13
+ mov 0x30(%rdi), %r14
+ adc 0x30(%rsi), %r14
+
+ mov .Lp434x2(%rip), %rcx
+ sub %rcx, %r8
+ mov 0x8+.Lp434x2(%rip), %rcx
+ sbb %rcx, %r9
+ sbb %rcx, %r10
+ mov 0x10+.Lp434x2(%rip), %rcx
+ sbb %rcx, %r11
+ mov 0x18+.Lp434x2(%rip), %rcx
+ sbb %rcx, %r12
+ mov 0x20+.Lp434x2(%rip), %rcx
+ sbb %rcx, %r13
+ mov 0x28+.Lp434x2(%rip), %rcx
+ sbb %rcx, %r14
+
+ sbb \$0, %rax
+
+ mov .Lp434x2(%rip), %rdi
+ and %rax, %rdi
+ mov 0x8+.Lp434x2(%rip), %rsi
+ and %rax, %rsi
+ mov 0x10+.Lp434x2(%rip), %rcx
+ and %rax, %rcx
+
+ add %rdi, %r8
+ mov %r8, 0x0(%rdx)
+ adc %rsi, %r9
+ mov %r9, 0x8(%rdx)
+ adc %rsi, %r10
+ mov %r10, 0x10(%rdx)
+ adc %rcx, %r11
+ mov %r11, 0x18(%rdx)
+
+ setc %cl
+ mov 0x18+.Lp434x2(%rip), %r8
+ and %rax, %r8
+ mov 0x20+.Lp434x2(%rip), %r9
+ and %rax, %r9
+ mov 0x28+.Lp434x2(%rip), %r10
+ and %rax, %r10
+ bt \$0, %rcx
+
+ adc %r8, %r12
+ mov %r12, 0x20(%rdx)
+ adc %r9, %r13
+ mov %r13, 0x28(%rdx)
+ adc %r10, %r14
+ mov %r14, 0x30(%rdx)
+
+ pop %r14
+.cfi_adjust_cfa_offset -8
+ pop %r13
+.cfi_adjust_cfa_offset -8
+ pop %r12
+.cfi_adjust_cfa_offset -8
+ ret
+.cfi_endproc
+___
+
+# Loads data to XMM0 and XMM1 and
+# conditionaly swaps depending on XMM3
+sub cswap_block16() {
+ my $idx = shift;
+ $idx *= 16;
+ ("
+ movdqu $idx(%rdi), %xmm0
+ movdqu $idx(%rsi), %xmm1
+ movdqa %xmm1, %xmm2
+ pxor %xmm0, %xmm2
+ pand %xmm3, %xmm2
+ pxor %xmm2, %xmm0
+ pxor %xmm2, %xmm1
+ movdqu %xmm0, $idx(%rdi)
+ movdqu %xmm1, $idx(%rsi)
+ ");
+}
+
+# Conditionally swaps bits in x and y in constant time.
+# mask indicates bits to be swapped (set bits are swapped)
+# Operation: [rdi] <-> [rsi] if rdx==1
+sub sike_cswap {
+ # P[0] with Q[0]
+ foreach ( 0.. 6){$BLOCKS.=eval "&cswap_block16($_)";}
+ # P[1] with Q[1]
+ foreach ( 7..13){$BLOCKS.=eval "&cswap_block16($_)";}
+
+ my $body =<<___;
+.globl ${PREFIX}_cswap_asm
+.type ${PREFIX}_cswap_asm,\@function,3
+${PREFIX}_cswap_asm:
+ # Fill XMM3. After this step first half of XMM3 is
+ # just zeros and second half is whatever in RDX
+ mov %rdx, %xmm3
+
+ # Copy lower double word everywhere else. So that
+ # XMM3=RDX|RDX. As RDX has either all bits set
+ # or non result will be that XMM3 has also either
+ # all bits set or non of them. 68 = 01000100b
+ pshufd \$68, %xmm3, %xmm3
+ $BLOCKS
+ ret
+___
+ ($body)
+}
+$code.=&sike_cswap();
+
+
+# Field subtraction
+# Operation: c [rdx] = a [rdi] - b [rsi]
+$code.=<<___;
+.globl ${PREFIX}_fpsub
+.type ${PREFIX}_fpsub,\@function,3
+${PREFIX}_fpsub:
+.cfi_startproc
+ push %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset r12, -16
+ push %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset r13, -24
+ push %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset r14, -32
+
+ xor %rax, %rax
+
+ mov 0x0(%rdi), %r8
+ sub 0x0(%rsi), %r8
+ mov 0x8(%rdi), %r9
+ sbb 0x8(%rsi), %r9
+ mov 0x10(%rdi), %r10
+ sbb 0x10(%rsi), %r10
+ mov 0x18(%rdi), %r11
+ sbb 0x18(%rsi), %r11
+ mov 0x20(%rdi), %r12
+ sbb 0x20(%rsi), %r12
+ mov 0x28(%rdi), %r13
+ sbb 0x28(%rsi), %r13
+ mov 0x30(%rdi), %r14
+ sbb 0x30(%rsi), %r14
+
+ sbb \$0x0, %rax
+
+ mov .Lp434x2(%rip), %rdi
+ and %rax, %rdi
+ mov 0x08+.Lp434x2(%rip), %rsi
+ and %rax, %rsi
+ mov 0x10+.Lp434x2(%rip), %rcx
+ and %rax, %rcx
+
+ add %rdi, %r8
+ mov %r8, 0x0(%rdx)
+ adc %rsi, %r9
+ mov %r9, 0x8(%rdx)
+ adc %rsi, %r10
+ mov %r10, 0x10(%rdx)
+ adc %rcx, %r11
+ mov %r11, 0x18(%rdx)
+
+ setc %cl
+ mov 0x18+.Lp434x2(%rip), %r8
+ and %rax, %r8
+ mov 0x20+.Lp434x2(%rip), %r9
+ and %rax, %r9
+ mov 0x28+.Lp434x2(%rip), %r10
+ and %rax, %r10
+ bt \$0x0, %rcx
+
+ adc %r8, %r12
+ adc %r9, %r13
+ adc %r10, %r14
+ mov %r12, 0x20(%rdx)
+ mov %r13, 0x28(%rdx)
+ mov %r14, 0x30(%rdx)
+
+ pop %r14
+.cfi_adjust_cfa_offset -8
+ pop %r13
+.cfi_adjust_cfa_offset -8
+ pop %r12
+.cfi_adjust_cfa_offset -8
+ ret
+.cfi_endproc
+___
+
+# 434-bit multiprecision addition
+# Operation: c [rdx] = a [rdi] + b [rsi]
+$code.=<<___;
+.globl ${PREFIX}_mpadd_asm
+.type ${PREFIX}_mpadd_asm,\@function,3
+${PREFIX}_mpadd_asm:
+.cfi_startproc
+ mov 0x0(%rdi), %r8;
+ mov 0x8(%rdi), %r9
+ mov 0x10(%rdi), %r10
+ mov 0x18(%rdi), %r11
+ mov 0x20(%rdi), %rcx
+ add 0x0(%rsi), %r8
+ adc 0x8(%rsi), %r9
+ adc 0x10(%rsi), %r10
+ adc 0x18(%rsi), %r11
+ adc 0x20(%rsi), %rcx
+ mov %r8, 0x0(%rdx)
+ mov %r9, 0x8(%rdx)
+ mov %r10, 0x10(%rdx)
+ mov %r11, 0x18(%rdx)
+ mov %rcx, 0x20(%rdx)
+
+ mov 0x28(%rdi), %r8
+ mov 0x30(%rdi), %r9
+ adc 0x28(%rsi), %r8
+ adc 0x30(%rsi), %r9
+ mov %r8, 0x28(%rdx)
+ mov %r9, 0x30(%rdx)
+ ret
+.cfi_endproc
+___
+
+# 2x434-bit multiprecision subtraction
+# Operation: c [rdx] = a [rdi] - b [rsi].
+# Returns borrow mask
+$code.=<<___;
+.globl ${PREFIX}_mpsubx2_asm
+.type ${PREFIX}_mpsubx2_asm,\@function,3
+${PREFIX}_mpsubx2_asm:
+.cfi_startproc
+ xor %rax, %rax
+
+ mov 0x0(%rdi), %r8
+ mov 0x8(%rdi), %r9
+ mov 0x10(%rdi), %r10
+ mov 0x18(%rdi), %r11
+ mov 0x20(%rdi), %rcx
+ sub 0x0(%rsi), %r8
+ sbb 0x8(%rsi), %r9
+ sbb 0x10(%rsi), %r10
+ sbb 0x18(%rsi), %r11
+ sbb 0x20(%rsi), %rcx
+ mov %r8, 0x0(%rdx)
+ mov %r9, 0x8(%rdx)
+ mov %r10, 0x10(%rdx)
+ mov %r11, 0x18(%rdx)
+ mov %rcx, 0x20(%rdx)
+
+ mov 0x28(%rdi), %r8
+ mov 0x30(%rdi), %r9
+ mov 0x38(%rdi), %r10
+ mov 0x40(%rdi), %r11
+ mov 0x48(%rdi), %rcx
+ sbb 0x28(%rsi), %r8
+ sbb 0x30(%rsi), %r9
+ sbb 0x38(%rsi), %r10
+ sbb 0x40(%rsi), %r11
+ sbb 0x48(%rsi), %rcx
+ mov %r8, 0x28(%rdx)
+ mov %r9, 0x30(%rdx)
+ mov %r10, 0x38(%rdx)
+ mov %r11, 0x40(%rdx)
+ mov %rcx, 0x48(%rdx)
+
+ mov 0x50(%rdi), %r8
+ mov 0x58(%rdi), %r9
+ mov 0x60(%rdi), %r10
+ mov 0x68(%rdi), %r11
+ sbb 0x50(%rsi), %r8
+ sbb 0x58(%rsi), %r9
+ sbb 0x60(%rsi), %r10
+ sbb 0x68(%rsi), %r11
+ sbb \$0x0, %rax
+ mov %r8, 0x50(%rdx)
+ mov %r9, 0x58(%rdx)
+ mov %r10, 0x60(%rdx)
+ mov %r11, 0x68(%rdx)
+ ret
+.cfi_endproc
+___
+
+
+# Double 2x434-bit multiprecision subtraction
+# Operation: c [rdx] = c [rdx] - a [rdi] - b [rsi]
+$code.=<<___;
+.globl ${PREFIX}_mpdblsubx2_asm
+.type ${PREFIX}_mpdblsubx2_asm,\@function,3
+${PREFIX}_mpdblsubx2_asm:
+.cfi_startproc
+ push %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset r12, -16
+ push %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset r13, -24
+
+ xor %rax, %rax
+
+ # ci:low = c:low - a:low
+ mov 0x0(%rdx), %r8
+ mov 0x8(%rdx), %r9
+ mov 0x10(%rdx), %r10
+ mov 0x18(%rdx), %r11
+ mov 0x20(%rdx), %r12
+ mov 0x28(%rdx), %r13
+ mov 0x30(%rdx), %rcx
+ sub 0x0(%rdi), %r8
+ sbb 0x8(%rdi), %r9
+ sbb 0x10(%rdi), %r10
+ sbb 0x18(%rdi), %r11
+ sbb 0x20(%rdi), %r12
+ sbb 0x28(%rdi), %r13
+ sbb 0x30(%rdi), %rcx
+ adc \$0x0, %rax
+
+ # c:low = ci:low - b:low
+ sub 0x0(%rsi), %r8
+ sbb 0x8(%rsi), %r9
+ sbb 0x10(%rsi), %r10
+ sbb 0x18(%rsi), %r11
+ sbb 0x20(%rsi), %r12
+ sbb 0x28(%rsi), %r13
+ sbb 0x30(%rsi), %rcx
+ adc \$0x0, %rax
+
+ # store c:low
+ mov %r8, 0x0(%rdx)
+ mov %r9, 0x8(%rdx)
+ mov %r10, 0x10(%rdx)
+ mov %r11, 0x18(%rdx)
+ mov %r12, 0x20(%rdx)
+ mov %r13, 0x28(%rdx)
+ mov %rcx, 0x30(%rdx)
+
+ # ci:high = c:high - a:high
+ mov 0x38(%rdx), %r8
+ mov 0x40(%rdx), %r9
+ mov 0x48(%rdx), %r10
+ mov 0x50(%rdx), %r11
+ mov 0x58(%rdx), %r12
+ mov 0x60(%rdx), %r13
+ mov 0x68(%rdx), %rcx
+
+ sub %rax, %r8
+ sbb 0x38(%rdi), %r8
+ sbb 0x40(%rdi), %r9
+ sbb 0x48(%rdi), %r10
+ sbb 0x50(%rdi), %r11
+ sbb 0x58(%rdi), %r12
+ sbb 0x60(%rdi), %r13
+ sbb 0x68(%rdi), %rcx
+
+ # c:high = ci:high - b:high
+ sub 0x38(%rsi), %r8
+ sbb 0x40(%rsi), %r9
+ sbb 0x48(%rsi), %r10
+ sbb 0x50(%rsi), %r11
+ sbb 0x58(%rsi), %r12
+ sbb 0x60(%rsi), %r13
+ sbb 0x68(%rsi), %rcx
+
+ # store c:high
+ mov %r8, 0x38(%rdx)
+ mov %r9, 0x40(%rdx)
+ mov %r10, 0x48(%rdx)
+ mov %r11, 0x50(%rdx)
+ mov %r12, 0x58(%rdx)
+ mov %r13, 0x60(%rdx)
+ mov %rcx, 0x68(%rdx)
+
+ pop %r13
+.cfi_adjust_cfa_offset -8
+ pop %r12
+.cfi_adjust_cfa_offset -8
+ ret
+.cfi_endproc
+
+___
+
+sub redc_common {
+ my ($mul01, $mul23, $mul45, $mul67)=@_;
+ my $body=<<___;
+ $mul01
+ xor %rcx, %rcx
+ add 0x18(%rdi), %r8
+ adc 0x20(%rdi), %r9
+ adc 0x28(%rdi), %r10
+ adc 0x30(%rdi), %r11
+ adc 0x38(%rdi), %r12
+ adc 0x40(%rdi), %r13
+ adc 0x48(%rdi), %rcx
+ mov %r8, 0x18(%rdi)
+ mov %r9, 0x20(%rdi)
+ mov %r10, 0x28(%rdi)
+ mov %r11, 0x30(%rdi)
+ mov %r12, 0x38(%rdi)
+ mov %r13, 0x40(%rdi)
+ mov %rcx, 0x48(%rdi)
+ mov 0x50(%rdi), %r8
+ mov 0x58(%rdi), %r9
+ mov 0x60(%rdi), %r10
+ mov 0x68(%rdi), %r11
+ adc \$0x0, %r8
+ adc \$0x0, %r9
+ adc \$0x0, %r10
+ adc \$0x0, %r11
+ mov %r8, 0x50(%rdi)
+ mov %r9, 0x58(%rdi)
+ mov %r10, 0x60(%rdi)
+ mov %r11, 0x68(%rdi)
+
+ $mul23
+ xor %rcx, %rcx
+ add 0x28(%rdi), %r8
+ adc 0x30(%rdi), %r9
+ adc 0x38(%rdi), %r10
+ adc 0x40(%rdi), %r11
+ adc 0x48(%rdi), %r12
+ adc 0x50(%rdi), %r13
+ adc 0x58(%rdi), %rcx
+ mov %r8, 0x28(%rdi)
+ mov %r9, 0x30(%rdi)
+ mov %r10, 0x38(%rdi)
+ mov %r11, 0x40(%rdi)
+ mov %r12, 0x48(%rdi)
+ mov %r13, 0x50(%rdi)
+ mov %rcx, 0x58(%rdi)
+ mov 0x60(%rdi), %r8
+ mov 0x68(%rdi), %r9
+ adc \$0x0, %r8
+ adc \$0x0, %r9
+ mov %r8, 0x60(%rdi)
+ mov %r9, 0x68(%rdi)
+
+ $mul45
+ xor %rcx, %rcx
+ add 0x38(%rdi), %r8
+ adc 0x40(%rdi), %r9
+ adc 0x48(%rdi), %r10
+ adc 0x50(%rdi), %r11
+ adc 0x58(%rdi), %r12
+ adc 0x60(%rdi), %r13
+ adc 0x68(%rdi), %rcx
+ mov %r8, 0x0(%rsi) # C0
+ mov %r9, 0x8(%rsi) # C1
+ mov %r10, 0x48(%rdi)
+ mov %r11, 0x50(%rdi)
+ mov %r12, 0x58(%rdi)
+ mov %r13, 0x60(%rdi)
+ mov %rcx, 0x68(%rdi)
+
+ $mul67
+ add 0x48(%rdi), %r8
+ adc 0x50(%rdi), %r9
+ adc 0x58(%rdi), %r10
+ adc 0x60(%rdi), %r11
+ adc 0x68(%rdi), %r12
+ mov %r8, 0x10(%rsi) # C2
+ mov %r9, 0x18(%rsi) # C3
+ mov %r10, 0x20(%rsi) # C4
+ mov %r11, 0x28(%rsi) # C5
+ mov %r12, 0x30(%rsi) # C6
+___
+ return $body;
+}
+
+# Optimized Montgomery reduction for CPUs, based on method described
+# in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015.
+# Operation: c [rsi] = a [rdi]
+# NOTE: a=c is not allowed
+sub sike_rdc {
+ my $jump_redc_bdw=&alt_impl(".Lrdc_bdw") if ($bmi2_adx);
+ # a[0-1] x .Lp434p1 --> result: r8:r13
+ my $mulx1=&mulx128x256( 0,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13)),"%rcx");
+ # a[2-3] x .Lp434p1 --> result: r8:r13
+ my $mulx2=&mulx128x256(16,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13)),"%rcx");
+ # a[4-5] x .Lp434p1 --> result: r8:r13
+ my $mulx3=&mulx128x256(32,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13)),"%rcx");
+ # a[6-7] x .Lp434p1 --> result: r8:r13
+ my $mulx4=&mulx64x256( 48,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13)));
+
+ # a[0-1] x .Lp434p1 --> result: r8:r13
+ my $mul1=&mul128x256( 0,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..14)),"%rcx");
+ # a[2-3] x .Lp434p1 --> result: r8:r13
+ my $mul2=&mul128x256(16,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..14)),"%rcx");
+ # a[4-5] x .Lp434p1 --> result: r8:r13
+ my $mul3=&mul128x256(32,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..14)),"%rcx");
+ # a[6-7] x .Lp434p1 --> result: r8:r13
+ my $mul4=&mul64x256( 48,"%rdi",".Lp434p1(%rip)",map("%r$_",(8..13)));
+
+ my $redc_mul=&redc_common($mul1, $mul2, $mul3, $mul4);
+ my $redc_bdw=&redc_common($mulx1, $mulx2, $mulx3, $mulx4) if ($bmi2_adx);
+
+ # REDC for Broadwell CPUs
+ my $code=<<___;
+ .Lrdc_bdw:
+ .cfi_startproc
+ # sike_fprdc has already pushed r12--15 by this point.
+ .cfi_adjust_cfa_offset 32
+ .cfi_offset r12, -16
+ .cfi_offset r13, -24
+ .cfi_offset r14, -32
+ .cfi_offset r15, -40
+
+ $redc_bdw
+
+ pop %r15
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value r15
+ pop %r14
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value r14
+ pop %r13
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value r13
+ pop %r12
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value r12
+ ret
+ .cfi_endproc
+___
+
+ # REDC for CPUs older than Broadwell
+ $code.=<<___;
+ .globl ${PREFIX}_fprdc
+ .type ${PREFIX}_fprdc,\@function,3
+ ${PREFIX}_fprdc:
+ .cfi_startproc
+ push %r12
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset r12, -16
+ push %r13
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset r13, -24
+ push %r14
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset r14, -32
+ push %r15
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset r15, -40
+
+ # Jump to optimized implementation if
+ # CPU supports ADCX/ADOX/MULX
+ $jump_redc_bdw
+ # Otherwise use generic implementation
+ $redc_mul
+
+ pop %r15
+ .cfi_adjust_cfa_offset -8
+ pop %r14
+ .cfi_adjust_cfa_offset -8
+ pop %r13
+ .cfi_adjust_cfa_offset -8
+ pop %r12
+ .cfi_adjust_cfa_offset -8
+ ret
+ .cfi_endproc
+___
+ return $code;
+}
+$code.=&sike_rdc();
+
+# 434-bit multiplication using Karatsuba (one level),
+# schoolbook (one level). Uses MULX/ADOX/ADCX instructions
+# available on Broadwell micro-architectures and newer.
+sub mul_bdw {
+ # [rsp] <- (AH+AL) x (BH+BL)
+ my $mul256_low=&mul256(0,"%rsp",32,"%rsp",0,"%rsp",map("%r$_",(8..15)),"%rbx","%rbp");
+ # [rcx] <- AL x BL
+ my $mul256_albl=&mul256(0,"%rdi",0,"%rsi",0,"%rcx",map("%r$_",(8..15)),"%rbx","%rbp");
+ # [rcx+64] <- AH x BH
+ my $mul192_ahbh=&mul192(32,"%rdi",32,"%rsi",64,"%rcx",map("%r$_",(8..14)));
+
+ $body=<<___;
+
+ mov %rdx, %rcx
+ xor %rax, %rax
+
+ # r8-r11 <- AH + AL, rax <- mask
+ mov 0x0(%rdi), %r8
+ mov 0x8(%rdi), %r9
+ mov 0x10(%rdi), %r10
+ mov 0x18(%rdi), %r11
+
+ push %rbx
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset rbx, -48
+ push %rbp
+ .cfi_offset rbp, -56
+ .cfi_adjust_cfa_offset 8
+ sub \$96, %rsp
+ .cfi_adjust_cfa_offset 96
+
+ add 0x20(%rdi), %r8
+ adc 0x28(%rdi), %r9
+ adc 0x30(%rdi), %r10
+ adc \$0x0, %r11
+ sbb \$0x0, %rax
+ mov %r8, 0x0(%rsp)
+ mov %r9, 0x8(%rsp)
+ mov %r10, 0x10(%rsp)
+ mov %r11, 0x18(%rsp)
+
+ # r12-r15 <- BH + BL, rbx <- mask
+ xor %rbx, %rbx
+ mov 0x0(%rsi), %r12
+ mov 0x8(%rsi), %r13
+ mov 0x10(%rsi), %r14
+ mov 0x18(%rsi), %r15
+ add 0x20(%rsi), %r12
+ adc 0x28(%rsi), %r13
+ adc 0x30(%rsi), %r14
+ adc \$0x0, %r15
+ sbb \$0x0, %rbx
+ mov %r12, 0x20(%rsp)
+ mov %r13, 0x28(%rsp)
+ mov %r14, 0x30(%rsp)
+ mov %r15, 0x38(%rsp)
+
+ # r12-r15 <- masked (BH + BL)
+ and %rax, %r12
+ and %rax, %r13
+ and %rax, %r14
+ and %rax, %r15
+
+ # r8-r11 <- masked (AH + AL)
+ and %rbx, %r8
+ and %rbx, %r9
+ and %rbx, %r10
+ and %rbx, %r11
+
+ # r8-r11 <- masked (AH + AL) + masked (BH + BL)
+ add %r12, %r8
+ adc %r13, %r9
+ adc %r14, %r10
+ adc %r15, %r11
+ mov %r8, 0x40(%rsp)
+ mov %r9, 0x48(%rsp)
+ mov %r10, 0x50(%rsp)
+ mov %r11, 0x58(%rsp)
+
+ # [rsp] <- CM = (AH+AL) x (BH+BL)
+ $mul256_low
+ # [rcx] <- CL = AL x BL (Result c0-c3)
+ $mul256_albl
+ # [rcx+64] <- CH = AH x BH
+ $mul192_ahbh
+
+ # r8-r11 <- (AH+AL) x (BH+BL), final step
+ mov 0x40(%rsp), %r8
+ mov 0x48(%rsp), %r9
+ mov 0x50(%rsp), %r10
+ mov 0x58(%rsp), %r11
+
+ mov 0x20(%rsp), %rax
+ add %rax, %r8
+ mov 0x28(%rsp), %rax
+ adc %rax, %r9
+ mov 0x30(%rsp), %rax
+ adc %rax, %r10
+ mov 0x38(%rsp), %rax
+ adc %rax, %r11
+
+ # [rsp], x3-x5 <- (AH+AL) x (BH+BL) - ALxBL
+ mov 0x0(%rsp), %r12
+ mov 0x8(%rsp), %r13
+ mov 0x10(%rsp), %r14
+ mov 0x18(%rsp), %r15
+ sub 0x0(%rcx), %r12
+ sbb 0x8(%rcx), %r13
+ sbb 0x10(%rcx), %r14
+ sbb 0x18(%rcx), %r15
+ sbb 0x20(%rcx), %r8
+ sbb 0x28(%rcx), %r9
+ sbb 0x30(%rcx), %r10
+ sbb 0x38(%rcx), %r11
+
+ # r8-r15 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+ sub 0x40(%rcx), %r12
+ sbb 0x48(%rcx), %r13
+ sbb 0x50(%rcx), %r14
+ sbb 0x58(%rcx), %r15
+ sbb 0x60(%rcx), %r8
+ sbb 0x68(%rcx), %r9
+ sbb \$0x0, %r10
+ sbb \$0x0, %r11
+
+ add 0x20(%rcx), %r12
+ mov %r12, 0x20(%rcx) # Result C4-C7
+ adc 0x28(%rcx), %r13
+ mov %r13, 0x28(%rcx)
+ adc 0x30(%rcx), %r14
+ mov %r14, 0x30(%rcx)
+ adc 0x38(%rcx), %r15
+ mov %r15, 0x38(%rcx)
+ adc 0x40(%rcx), %r8
+ mov %r8, 0x40(%rcx) # Result C8-C15
+ adc 0x48(%rcx), %r9
+ mov %r9, 0x48(%rcx)
+ adc 0x50(%rcx), %r10
+ mov %r10, 0x50(%rcx)
+ adc 0x58(%rcx), %r11
+ mov %r11, 0x58(%rcx)
+ mov 0x60(%rcx), %r12
+ adc \$0x0, %r12
+ mov %r12, 0x60(%rcx)
+ mov 0x68(%rcx), %r13
+ adc \$0x0, %r13
+ mov %r13, 0x68(%rcx)
+
+ add \$96, %rsp
+ .cfi_adjust_cfa_offset -96
+ pop %rbp
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value rbp
+ pop %rbx
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value rbx
+___
+ return $body;
+}
+
+# 434-bit multiplication using Karatsuba (one level),
+# schoolbook (one level).
+sub mul {
+ my $code=<<___;
+ mov %rdx, %rcx
+
+ sub \$112, %rsp # Allocating space in stack
+ .cfi_adjust_cfa_offset 112
+
+ # rcx[0-3] <- AH+AL
+ xor %rax, %rax
+ mov 0x20(%rdi), %r8
+ mov 0x28(%rdi), %r9
+ mov 0x30(%rdi), %r10
+ xor %r11, %r11
+ add 0x0(%rdi), %r8
+ adc 0x8(%rdi), %r9
+ adc 0x10(%rdi), %r10
+ adc 0x18(%rdi), %r11
+ # store AH+AL mask
+ sbb \$0, %rax
+ mov %rax, 0x40(%rsp)
+ # store AH+AL in 0-0x18(rcx)
+ mov %r8, 0x0(%rcx)
+ mov %r9, 0x8(%rcx)
+ mov %r10, 0x10(%rcx)
+ mov %r11, 0x18(%rcx)
+
+ # r12-r15 <- BH+BL
+ xor %rdx, %rdx
+ mov 0x20(%rsi), %r12
+ mov 0x28(%rsi), %r13
+ mov 0x30(%rsi), %r14
+ xor %r15, %r15
+ add 0x0(%rsi), %r12
+ adc 0x8(%rsi), %r13
+ adc 0x10(%rsi), %r14
+ adc 0x18(%rsi), %r15
+ sbb \$0x0, %rdx
+ # store BH+BL mask
+ mov %rdx, 0x48(%rsp)
+
+ # (rsp[0-0x38]) <- (AH+AL)*(BH+BL)
+ mov (%rcx), %rax
+ mul %r12
+ mov %rax, (%rsp) # c0
+ mov %rdx, %r8
+
+ xor %r9, %r9
+ mov (%rcx), %rax
+ mul %r13
+ add %rax, %r8
+ adc %rdx, %r9
+
+ xor %r10, %r10
+ mov 0x8(%rcx), %rax
+ mul %r12
+ add %rax, %r8
+ mov %r8, 0x8(%rsp) # c1
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ xor %r8, %r8
+ mov (%rcx), %rax
+ mul %r14
+ add %rax, %r9
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov 0x10(%rcx), %rax
+ mul %r12
+ add %rax, %r9
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov 0x8(%rcx), %rax
+ mul %r13
+ add %rax, %r9
+ mov %r9, 0x10(%rsp) # c2
+ adc %rdx, %r10
+ adc \$0x0, %r8
+
+ xor %r9, %r9
+ mov (%rcx),%rax
+ mul %r15
+ add %rax, %r10
+ adc %rdx, %r8
+ adc \$0x0,%r9
+
+ mov 0x18(%rcx), %rax
+ mul %r12
+ add %rax, %r10
+ adc %rdx, %r8
+ adc \$0x0,%r9
+
+ mov 0x8(%rcx), %rax
+ mul %r14
+ add %rax, %r10
+ adc %rdx, %r8
+ adc \$0x0,%r9
+
+ mov 0x10(%rcx), %rax
+ mul %r13
+ add %rax, %r10
+ mov %r10, 0x18(%rsp) # c3
+ adc %rdx, %r8
+ adc \$0x0, %r9
+
+ xor %r10, %r10
+ mov 0x8(%rcx), %rax
+ mul %r15
+ add %rax, %r8
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ mov 0x18(%rcx), %rax
+ mul %r13
+ add %rax, %r8
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ mov 0x10(%rcx), %rax
+ mul %r14
+ add %rax, %r8 # c4
+ mov %r8, 0x20(%rsp)
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ xor %r11, %r11
+ mov 0x10(%rcx), %rax
+ mul %r15
+ add %rax, %r9
+ adc %rdx, %r10
+ adc \$0x0,%r11
+
+ mov 0x18(%rcx), %rax
+ mul %r14
+ add %rax, %r9 # c5
+ mov %r9, 0x28(%rsp)
+ adc %rdx, %r10
+ adc \$0x0,%r11
+
+ mov 0x18(%rcx), %rax
+ mul %r15
+ add %rax, %r10 # c6
+ mov %r10, 0x30(%rsp)
+ adc %rdx, %r11 # c7
+ mov %r11, 0x38(%rsp)
+
+ # r12-r15 <- masked (BH + BL)
+ mov 0x40(%rsp), %rax
+ and %rax, %r12
+ and %rax, %r13
+ and %rax, %r14
+ and %rax, %r15
+
+ # r8-r11 <- masked (AH + AL)
+ mov 0x48(%rsp),%rax
+ mov 0x00(rcx), %r8
+ and %rax, %r8
+ mov 0x08(rcx), %r9
+ and %rax, %r9
+ mov 0x10(rcx), %r10
+ and %rax, %r10
+ mov 0x18(rcx), %r11
+ and %rax, %r11
+
+ # r12-r15 <- masked (AH + AL) + masked (BH + BL)
+ add %r8, %r12
+ adc %r9, %r13
+ adc %r10, %r14
+ adc %r11, %r15
+
+ # rsp[0x20-0x38] <- (AH+AL) x (BH+BL) high
+ mov 0x20(%rsp), %rax
+ add %rax, %r12
+ mov 0x28(%rsp), %rax
+ adc %rax, %r13
+ mov 0x30(%rsp), %rax
+ adc %rax, %r14
+ mov 0x38(%rsp), %rax
+ adc %rax, %r15
+ mov %r12, 0x50(%rsp)
+ mov %r13, 0x58(%rsp)
+ mov %r14, 0x60(%rsp)
+ mov %r15, 0x68(%rsp)
+
+ # [rcx] <- CL = AL x BL
+ mov (%rdi), %r11
+ mov (%rsi), %rax
+ mul %r11
+ xor %r9, %r9
+ mov %rax, (%rcx) # c0
+ mov %rdx, %r8
+
+ mov 0x10(%rdi), %r14
+ mov 0x8(%rsi), %rax
+ mul %r11
+ xor %r10, %r10
+ add %rax, %r8
+ adc %rdx, %r9
+
+ mov 0x8(%rdi), %r12
+ mov (%rsi), %rax
+ mul %r12
+ add %rax, %r8
+ mov %r8, 0x8(%rcx) # c1
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ xor %r8, %r8
+ mov 0x10(%rsi), %rax
+ mul %r11
+ add %rax, %r9
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov (%rsi),%r13
+ mov %r14, %rax
+ mul %r13
+ add %rax, %r9
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov 0x8(%rsi), %rax
+ mul %r12
+ add %rax, %r9
+ mov %r9, 0x10(%rcx) # c2
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ xor %r9, %r9
+ mov 0x18(%rsi), %rax
+ mul %r11
+ mov 0x18(%rdi), %r15
+ add %rax, %r10
+ adc %rdx, %r8
+ adc \$0x0,%r9
+
+ mov %r15, %rax
+ mul %r13
+ add %rax, %r10
+ adc %rdx, %r8
+ adc \$0x0,%r9
+
+ mov 0x10(%rsi), %rax
+ mul %r12
+ add %rax, %r10
+ adc %rdx, %r8
+ adc \$0x0,%r9
+
+ mov 0x8(%rsi), %rax
+ mul %r14
+ add %rax, %r10
+ mov %r10, 0x18(%rcx) # c3
+ adc %rdx, %r8
+ adc \$0x0,%r9
+
+ xor %r10, %r10
+ mov 0x18(%rsi), %rax
+ mul %r12
+ add %rax, %r8
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ mov 0x8(%rsi), %rax
+ mul %r15
+ add %rax, %r8
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ mov 0x10(%rsi), %rax
+ mul %r14
+ add %rax, %r8
+ mov %r8, 0x20(%rcx) # c4
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ xor %r8, %r8
+ mov 0x18(%rsi), %rax
+ mul %r14
+ add %rax, %r9
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov 0x10(%rsi), %rax
+ mul %r15
+ add %rax, %r9
+ mov %r9, 0x28(%rcx) # c5
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov 0x18(%rsi), %rax
+ mul %r15
+ add %rax, %r10
+ mov %r10, 0x30(%rcx) # c6
+ adc %rdx, %r8
+ mov %r8, 0x38(%rcx) # c7
+
+ # rcx[0x40-0x68] <- AH*BH
+ # multiplies 2 192-bit numbers A,B
+ mov 0x20(%rdi), %r11
+ mov 0x20(%rsi), %rax
+ mul %r11
+ xor %r9, %r9
+ mov %rax, 0x40(%rcx) # c0
+ mov %rdx, %r8
+
+ mov 0x30(%rdi), %r14
+ mov 0x28(%rsi), %rax
+ mul %r11
+ xor %r10, %r10
+ add %rax, %r8
+ adc %rdx, %r9
+
+ mov 0x28(%rdi), %r12
+ mov 0x20(%rsi), %rax
+ mul %r12
+ add %rax, %r8
+ mov %r8, 0x48(%rcx) # c1
+ adc %rdx, %r9
+ adc \$0x0,%r10
+
+ xor %r8, %r8
+ mov 0x30(%rsi), %rax
+ mul %r11
+ add %rax, %r9
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov 0x20(%rsi), %r13
+ mov %r14, %rax
+ mul %r13
+ add %rax, %r9
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov 0x28(%rsi), %rax
+ mul %r12
+ add %rax, %r9
+ mov %r9, 0x50(%rcx) # c2
+ adc %rdx, %r10
+ adc \$0x0,%r8
+
+ mov 0x30(%rsi), %rax
+ mul %r12
+ xor %r12, %r12
+ add %rax, %r10
+ adc %rdx, %r8
+ adc \$0x0,%r12
+
+ mov 0x28(%rsi), %rax
+ mul %r14
+ add %rax, %r10
+ adc %rdx, %r8
+ adc \$0x0,%r12
+ mov %r10, 0x58(%rcx) # c3
+
+ mov 0x30(%rsi), %rax
+ mul %r14
+ add %rax, %r8
+ adc \$0x0,%r12
+ mov %r8, 0x60(%rcx) # c4
+
+ add %r12, %rdx # c5
+
+ # [r8-r15] <- (AH+AL)x(BH+BL) - ALxBL
+ mov 0x0(%rsp), %r8
+ sub 0x0(%rcx), %r8
+ mov 0x8(%rsp), %r9
+ sbb 0x8(%rcx), %r9
+ mov 0x10(%rsp), %r10
+ sbb 0x10(%rcx), %r10
+ mov 0x18(%rsp), %r11
+ sbb 0x18(%rcx), %r11
+ mov 0x50(%rsp), %r12
+ sbb 0x20(%rcx), %r12
+ mov 0x58(%rsp), %r13
+ sbb 0x28(%rcx), %r13
+ mov 0x60(%rsp), %r14
+ sbb 0x30(%rcx), %r14
+ mov 0x68(%rsp), %r15
+ sbb 0x38(%rcx), %r15
+
+ # [r8-r15] <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+ mov 0x40(%rcx), %rax
+ sub %rax, %r8
+ mov 0x48(%rcx), %rax
+ sbb %rax, %r9
+ mov 0x50(%rcx), %rax
+ sbb %rax, %r10
+ mov 0x58(%rcx), %rax
+ sbb %rax, %r11
+ mov 0x60(%rcx), %rax
+ sbb %rax, %r12
+ sbb %rdx, %r13
+ sbb \$0x0,%r14
+ sbb \$0x0,%r15
+
+ # Final result
+ add 0x20(%rcx), %r8
+ mov %r8, 0x20(%rcx) # Result C4-C7
+ adc 0x28(%rcx), %r9
+ mov %r9, 0x28(%rcx)
+ adc 0x30(%rcx), %r10
+ mov %r10, 0x30(%rcx)
+ adc 0x38(%rcx), %r11
+ mov %r11, 0x38(%rcx)
+ adc 0x40(%rcx), %r12
+ mov %r12, 0x40(%rcx) # Result C8-C13
+ adc 0x48(%rcx), %r13
+ mov %r13, 0x48(%rcx)
+ adc 0x50(%rcx), %r14
+ mov %r14, 0x50(%rcx)
+ adc 0x58(%rcx), %r15
+ mov %r15, 0x58(%rcx)
+ mov 0x60(%rcx), %r12
+ adc \$0x0, %r12
+ mov %r12, 0x60(%rcx)
+ adc \$0x0, %rdx
+ mov %rdx, 0x68(%rcx)
+
+ add \$112, %rsp # Restoring space in stack
+ .cfi_adjust_cfa_offset -112
+___
+
+ return $code;
+}
+
+# Integer multiplication based on Karatsuba method
+# Operation: c [rdx] = a [rdi] * b [rsi]
+# NOTE: a=c or b=c are not allowed
+sub sike_mul {
+ my $jump_mul_bdw=&alt_impl(".Lmul_bdw") if ($bmi2_adx);
+ # MUL for Broadwell CPUs
+ my $mul_bdw=&mul_bdw() if ($bmi2_adx);
+ # MUL for CPUs older than Broadwell
+ my $mul=&mul();
+
+ my $body=<<___;
+ .Lmul_bdw:
+ .cfi_startproc
+ # sike_mpmul has already pushed r12--15 by this point.
+ .cfi_adjust_cfa_offset 32
+ .cfi_offset r12, -16
+ .cfi_offset r13, -24
+ .cfi_offset r14, -32
+ .cfi_offset r15, -40
+
+ $mul_bdw
+
+ pop %r15
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value r15
+ pop %r14
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value r14
+ pop %r13
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value r13
+ pop %r12
+ .cfi_adjust_cfa_offset -8
+ .cfi_same_value r12
+ ret
+ .cfi_endproc
+
+ .globl ${PREFIX}_mpmul
+ .type ${PREFIX}_mpmul,\@function,3
+ ${PREFIX}_mpmul:
+ .cfi_startproc
+ push %r12
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset r12, -16
+ push %r13
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset r13, -24
+ push %r14
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset r14, -32
+ push %r15
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset r15, -40
+
+ # Jump to optimized implementation if
+ # CPU supports ADCX/ADOX/MULX
+ $jump_mul_bdw
+ # Otherwise use generic implementation
+ $mul
+
+ pop %r15
+ .cfi_adjust_cfa_offset -8
+ pop %r14
+ .cfi_adjust_cfa_offset -8
+ pop %r13
+ .cfi_adjust_cfa_offset -8
+ pop %r12
+ .cfi_adjust_cfa_offset -8
+ ret
+ .cfi_endproc
+
+___
+ return $body;
+}
+
+$code.=&sike_mul();
+
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/ge;
+ print $_,"\n";
+}
+
+close STDOUT;
diff --git a/src/third_party/sike/asm/fp_generic.c b/src/third_party/sike/asm/fp_generic.c
new file mode 100644
index 00000000..991ad1e1
--- /dev/null
+++ b/src/third_party/sike/asm/fp_generic.c
@@ -0,0 +1,181 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: portable modular arithmetic for P503
+*********************************************************************************************/
+
+#include <openssl/base.h>
+
+#if defined(OPENSSL_NO_ASM) || \
+ (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64))
+
+#include "../utils.h"
+#include "../fpx.h"
+
+// Global constants
+extern const struct params_t sike_params;
+
+static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result
+ crypto_word_t al, ah, bl, bh, temp;
+ crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+ crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
+ crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4);
+
+ al = a & mask_low; // Low part
+ ah = a >> (sizeof(crypto_word_t) * 4); // High part
+ bl = b & mask_low;
+ bh = b >> (sizeof(crypto_word_t) * 4);
+
+ albl = al*bl;
+ albh = al*bh;
+ ahbl = ah*bl;
+ ahbh = ah*bh;
+ c[0] = albl & mask_low; // C00
+
+ res1 = albl >> (sizeof(crypto_word_t) * 4);
+ res2 = ahbl & mask_low;
+ res3 = albh & mask_low;
+ temp = res1 + res2 + res3;
+ carry = temp >> (sizeof(crypto_word_t) * 4);
+ c[0] ^= temp << (sizeof(crypto_word_t) * 4); // C01
+
+ res1 = ahbl >> (sizeof(crypto_word_t) * 4);
+ res2 = albh >> (sizeof(crypto_word_t) * 4);
+ res3 = ahbh & mask_low;
+ temp = res1 + res2 + res3 + carry;
+ c[1] = temp & mask_low; // C10
+ carry = temp & mask_high;
+ c[1] ^= (ahbh & mask_high) + carry; // C11
+}
+
+void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
+{ // Modular addition, c = a+b mod p434.
+ // Inputs: a, b in [0, 2*p434-1]
+ // Output: c in [0, 2*p434-1]
+ unsigned int i, carry = 0;
+ crypto_word_t mask;
+
+ for (i = 0; i < NWORDS_FIELD; i++) {
+ ADDC(carry, a[i], b[i], carry, c[i]);
+ }
+
+ carry = 0;
+ for (i = 0; i < NWORDS_FIELD; i++) {
+ SUBC(carry, c[i], sike_params.prime_x2[i], carry, c[i]);
+ }
+ mask = 0 - (crypto_word_t)carry;
+
+ carry = 0;
+ for (i = 0; i < NWORDS_FIELD; i++) {
+ ADDC(carry, c[i], sike_params.prime_x2[i] & mask, carry, c[i]);
+ }
+}
+
+void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
+{ // Modular subtraction, c = a-b mod p434.
+ // Inputs: a, b in [0, 2*p434-1]
+ // Output: c in [0, 2*p434-1]
+ unsigned int i, borrow = 0;
+ crypto_word_t mask;
+
+ for (i = 0; i < NWORDS_FIELD; i++) {
+ SUBC(borrow, a[i], b[i], borrow, c[i]);
+ }
+ mask = 0 - (crypto_word_t)borrow;
+
+ borrow = 0;
+ for (i = 0; i < NWORDS_FIELD; i++) {
+ ADDC(borrow, c[i], sike_params.prime_x2[i] & mask, borrow, c[i]);
+ }
+}
+
+void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
+ unsigned int i, j;
+ crypto_word_t t = 0, u = 0, v = 0, UV[2];
+ unsigned int carry = 0;
+
+ for (i = 0; i < NWORDS_FIELD; i++) {
+ for (j = 0; j <= i; j++) {
+ MUL(a[j], b[i-j], UV+1, UV[0]);
+ ADDC(0, UV[0], v, carry, v);
+ ADDC(carry, UV[1], u, carry, u);
+ t += carry;
+ }
+ c[i] = v;
+ v = u;
+ u = t;
+ t = 0;
+ }
+
+ for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+ for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+ MUL(a[j], b[i-j], UV+1, UV[0]);
+ ADDC(0, UV[0], v, carry, v);
+ ADDC(carry, UV[1], u, carry, u);
+ t += carry;
+ }
+ c[i] = v;
+ v = u;
+ u = t;
+ t = 0;
+ }
+ c[2*NWORDS_FIELD-1] = v;
+}
+
+void sike_fprdc(felm_t ma, felm_t mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
+ // mc = ma*R^-1 mod p434x2, where R = 2^448.
+ // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
+ // ma is assumed to be in Montgomery representation.
+ unsigned int i, j, carry, count = ZERO_WORDS;
+ crypto_word_t UV[2], t = 0, u = 0, v = 0;
+
+ for (i = 0; i < NWORDS_FIELD; i++) {
+ mc[i] = 0;
+ }
+
+ for (i = 0; i < NWORDS_FIELD; i++) {
+ for (j = 0; j < i; j++) {
+ if (j < (i-ZERO_WORDS+1)) {
+ MUL(mc[j], sike_params.prime_p1[i-j], UV+1, UV[0]);
+ ADDC(0, UV[0], v, carry, v);
+ ADDC(carry, UV[1], u, carry, u);
+ t += carry;
+ }
+ }
+ ADDC(0, v, ma[i], carry, v);
+ ADDC(carry, u, 0, carry, u);
+ t += carry;
+ mc[i] = v;
+ v = u;
+ u = t;
+ t = 0;
+ }
+
+ for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+ if (count > 0) {
+ count -= 1;
+ }
+ for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+ if (j < (NWORDS_FIELD-count)) {
+ MUL(mc[j], sike_params.prime_p1[i-j], UV+1, UV[0]);
+ ADDC(0, UV[0], v, carry, v);
+ ADDC(carry, UV[1], u, carry, u);
+ t += carry;
+ }
+ }
+ ADDC(0, v, ma[i], carry, v);
+ ADDC(carry, u, 0, carry, u);
+ t += carry;
+ mc[i-NWORDS_FIELD] = v;
+ v = u;
+ u = t;
+ t = 0;
+ }
+ ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v);
+ mc[NWORDS_FIELD-1] = v;
+}
+
+#endif // NO_ASM || (!X86_64 && !AARCH64)
diff --git a/src/third_party/sike/curve_params.c b/src/third_party/sike/curve_params.c
new file mode 100644
index 00000000..a1fbb3f1
--- /dev/null
+++ b/src/third_party/sike/curve_params.c
@@ -0,0 +1,128 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P434
+*********************************************************************************************/
+
+#include "utils.h"
+
+// Parameters for isogeny system "SIKE"
+const struct params_t sike_params = {
+ .prime = {
+ U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
+ U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF),
+ U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
+ U64_TO_WORDS(0x0002341F27177344)
+ },
+ .prime_p1 = {
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000),
+ U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
+ U64_TO_WORDS(0x0002341F27177344)
+ },
+ .prime_x2 = {
+ U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
+ U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF),
+ U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC),
+ U64_TO_WORDS(0x0004683E4E2EE688)
+ },
+ .A_gen = {
+ U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B),
+ U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1),
+ U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F),
+ U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0
+ U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B),
+ U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA),
+ U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C),
+ U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1
+ U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6),
+ U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03),
+ U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215),
+ U64_TO_WORDS(0x0001C4CB77542876), // XQA0
+ U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050),
+ U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374),
+ U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508),
+ U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1
+ U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611),
+ U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD),
+ U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3),
+ U64_TO_WORDS(0x00022A81D8D55643), // XRA0
+ U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED),
+ U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4),
+ U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2),
+ U64_TO_WORDS(0x0000B87FC716C0C6) // XRA1
+ },
+ .B_gen = {
+ U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05),
+ U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F),
+ U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9),
+ U64_TO_WORDS(0x0001BED4772E551F), // XPB0
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), // XPB1
+ U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C),
+ U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62),
+ U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F),
+ U64_TO_WORDS(0x000034080181D8AE), // XQB0
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), // XQB1
+ U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647),
+ U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D),
+ U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504),
+ U64_TO_WORDS(0x00007E8A50F02E37), // XRB0
+ U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230),
+ U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53),
+ U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B),
+ U64_TO_WORDS(0x000173FA910377D3) // XRB1
+ },
+ .mont_R2 = {
+ U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2),
+ U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B),
+ U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A),
+ U64_TO_WORDS(0x000025A89BCDD12A)
+ },
+ .mont_one = {
+ U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000),
+ U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C),
+ U64_TO_WORDS(0x0000ECEEA7BD2EDA)
+ },
+ .mont_six = {
+ U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000),
+ U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000),
+ U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0),
+ U64_TO_WORDS(0x00012559A0403298)
+ },
+ .A_strat = {
+ 0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+ 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04,
+ 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01,
+ 0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02,
+ 0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04,
+ 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01,
+ 0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
+ 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01,
+ 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03,
+ 0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04,
+ 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01
+ },
+ .B_strat = {
+ 0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01,
+ 0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01,
+ 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02,
+ 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10,
+ 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
+ 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01,
+ 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+ 0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01,
+ 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
+ 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02,
+ 0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01,
+ 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+ 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
+ 0x02, 0x01, 0x01, 0x02, 0x01, 0x01
+ }
+};
diff --git a/src/third_party/sike/fpx.c b/src/third_party/sike/fpx.c
new file mode 100644
index 00000000..9917116c
--- /dev/null
+++ b/src/third_party/sike/fpx.c
@@ -0,0 +1,283 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: core functions over GF(p) and GF(p^2)
+*********************************************************************************************/
+#include <openssl/base.h>
+
+#include "utils.h"
+#include "fpx.h"
+
+extern const struct params_t sike_params;
+
+// Multiprecision squaring, c = a^2 mod p.
+static void fpsqr_mont(const felm_t ma, felm_t mc)
+{
+ dfelm_t temp = {0};
+ sike_mpmul(ma, ma, temp);
+ sike_fprdc(temp, mc);
+}
+
+// Chain to compute a^(p-3)/4 using Montgomery arithmetic.
+static void fpinv_chain_mont(felm_t a)
+{
+ unsigned int i, j;
+ felm_t t[31], tt;
+
+ // Precomputed table
+ fpsqr_mont(a, tt);
+ sike_fpmul_mont(a, tt, t[0]);
+ for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]);
+
+ sike_fpcopy(a, tt);
+ for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[5], tt, tt);
+ for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[14], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[3], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[23], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[13], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[24], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[7], tt, tt);
+ for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[12], tt, tt);
+ for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[30], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[1], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[30], tt, tt);
+ for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[21], tt, tt);
+ for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[2], tt, tt);
+ for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[19], tt, tt);
+ for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[1], tt, tt);
+ for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[24], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[26], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[16], tt, tt);
+ for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[10], tt, tt);
+ for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[6], tt, tt);
+ for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[0], tt, tt);
+ for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[20], tt, tt);
+ for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[9], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[25], tt, tt);
+ for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[30], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[26], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(a, tt, tt);
+ for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[28], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[6], tt, tt);
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[10], tt, tt);
+ for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[22], tt, tt);
+ for (j = 0; j < 35; j++) {
+ for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+ sike_fpmul_mont(t[30], tt, tt);
+ }
+ sike_fpcopy(tt, a);
+}
+
+// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
+static void fpinv_mont(felm_t a)
+{
+ felm_t tt = {0};
+ sike_fpcopy(a, tt);
+ fpinv_chain_mont(tt);
+ fpsqr_mont(tt, tt);
+ fpsqr_mont(tt, tt);
+ sike_fpmul_mont(a, tt, a);
+}
+
+// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
+#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64))
+inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
+ uint8_t carry = 0;
+ for (size_t i = 0; i < nwords; i++) {
+ ADDC(carry, a[i], b[i], carry, c[i]);
+ }
+ return carry;
+}
+
+// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
+inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
+ uint32_t borrow = 0;
+ for (size_t i = 0; i < nwords; i++) {
+ SUBC(borrow, a[i], b[i], borrow, c[i]);
+ }
+ return borrow;
+}
+#endif
+
+// Multiprecision addition, c = a+b.
+inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
+{
+#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64))
+ mp_add(a, b, c, NWORDS_FIELD);
+#else
+ sike_mpadd_asm(a, b, c);
+#endif
+}
+
+// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
+// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
+inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
+#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64))
+ return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
+#else
+ return sike_mpsubx2_asm(a, b, c);
+#endif
+}
+
+// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
+// Inputs should be s.t. c > a and c > b
+inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
+#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64))
+ mp_sub(c, a, c, 2*NWORDS_FIELD);
+ mp_sub(c, b, c, 2*NWORDS_FIELD);
+#else
+ sike_mpdblsubx2_asm(a, b, c);
+#endif
+}
+
+// Copy a field element, c = a.
+void sike_fpcopy(const felm_t a, felm_t c) {
+ for (size_t i = 0; i < NWORDS_FIELD; i++) {
+ c[i] = a[i];
+ }
+}
+
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768
+void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
+{
+ dfelm_t temp = {0};
+ sike_mpmul(ma, mb, temp);
+ sike_fprdc(temp, mc);
+}
+
+// Conversion from Montgomery representation to standard representation,
+// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
+void sike_from_mont(const felm_t ma, felm_t c)
+{
+ felm_t one = {0};
+ one[0] = 1;
+
+ sike_fpmul_mont(ma, one, c);
+ sike_fpcorrection(c);
+}
+
+// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
+// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1]
+// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
+void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) {
+ felm_t t1, t2, t3;
+
+ mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1
+ sike_fpsub(a->c0, a->c1, t2); // t2 = a0-a1
+ mp_addfast(a->c0, a->c0, t3); // t3 = 2a0
+ sike_fpmul_mont(t1, t2, c->c0); // c0 = (a0+a1)(a0-a1)
+ sike_fpmul_mont(t3, a->c1, c->c1); // c1 = 2a0*a1
+}
+
+// Modular negation, a = -a mod p503.
+// Input/output: a in [0, 2*p503-1]
+void sike_fpneg(felm_t a) {
+ uint32_t borrow = 0;
+ for (size_t i = 0; i < NWORDS_FIELD; i++) {
+ SUBC(borrow, sike_params.prime_x2[i], a[i], borrow, a[i]);
+ }
+}
+
+// Modular division by two, c = a/2 mod p503.
+// Input : a in [0, 2*p503-1]
+// Output: c in [0, 2*p503-1]
+void sike_fpdiv2(const felm_t a, felm_t c) {
+ uint32_t carry = 0;
+ crypto_word_t mask;
+
+ mask = 0 - (crypto_word_t)(a[0] & 1); // If a is odd compute a+p503
+ for (size_t i = 0; i < NWORDS_FIELD; i++) {
+ ADDC(carry, a[i], sike_params.prime[i] & mask, carry, c[i]);
+ }
+
+ // Multiprecision right shift by one.
+ for (size_t i = 0; i < NWORDS_FIELD-1; i++) {
+ c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1));
+ }
+ c[NWORDS_FIELD-1] >>= 1;
+}
+
+// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
+void sike_fpcorrection(felm_t a) {
+ uint32_t borrow = 0;
+ crypto_word_t mask;
+
+ for (size_t i = 0; i < NWORDS_FIELD; i++) {
+ SUBC(borrow, a[i], sike_params.prime[i], borrow, a[i]);
+ }
+ mask = 0 - (crypto_word_t)borrow;
+
+ borrow = 0;
+ for (size_t i = 0; i < NWORDS_FIELD; i++) {
+ ADDC(borrow, a[i], sike_params.prime[i] & mask, borrow, a[i]);
+ }
+}
+
+// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2).
+// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1]
+// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
+void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) {
+ felm_t t1, t2;
+ dfelm_t tt1, tt2, tt3;
+ crypto_word_t mask;
+
+ mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1
+ mp_addfast(b->c0, b->c1, t2); // t2 = b0+b1
+ sike_mpmul(a->c0, b->c0, tt1); // tt1 = a0*b0
+ sike_mpmul(a->c1, b->c1, tt2); // tt2 = a1*b1
+ sike_mpmul(t1, t2, tt3); // tt3 = (a0+a1)*(b0+b1)
+ mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+ mask = mp_subfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0
+
+ for (size_t i = 0; i < NWORDS_FIELD; i++) {
+ t1[i] = sike_params.prime[i] & mask;
+ }
+
+ sike_fprdc(tt3, c->c1); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+ mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]);
+ sike_fprdc(tt1, c->c0); // c[0] = a0*b0 - a1*b1
+}
+
+// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
+void sike_fp2inv_mont(f2elm_t a) {
+ f2elm_t t1;
+
+ fpsqr_mont(a->c0, t1->c0); // t10 = a0^2
+ fpsqr_mont(a->c1, t1->c1); // t11 = a1^2
+ sike_fpadd(t1->c0, t1->c1, t1->c0); // t10 = a0^2+a1^2
+ fpinv_mont(t1->c0); // t10 = (a0^2+a1^2)^-1
+ sike_fpneg(a->c1); // a = a0-i*a1
+ sike_fpmul_mont(a->c0, t1->c0, a->c0);
+ sike_fpmul_mont(a->c1, t1->c0, a->c1); // a = (a0-i*a1)*(a0^2+a1^2)^-1
+}
diff --git a/src/third_party/sike/fpx.h b/src/third_party/sike/fpx.h
new file mode 100644
index 00000000..e6976885
--- /dev/null
+++ b/src/third_party/sike/fpx.h
@@ -0,0 +1,113 @@
+#ifndef FPX_H_
+#define FPX_H_
+
+#include "utils.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// Modular addition, c = a+b mod p.
+void sike_fpadd(const felm_t a, const felm_t b, felm_t c);
+// Modular subtraction, c = a-b mod p.
+void sike_fpsub(const felm_t a, const felm_t b, felm_t c);
+// Modular division by two, c = a/2 mod p.
+void sike_fpdiv2(const felm_t a, felm_t c);
+// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1].
+void sike_fpcorrection(felm_t a);
+// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c);
+// 443-bit Montgomery reduction, c = a mod p. Buffer 'a' is modified after
+// call returns.
+void sike_fprdc(dfelm_t a, felm_t c);
+// Double 2x443-bit multiprecision subtraction, c = c-a-b
+void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c);
+// Multiprecision subtraction, c = a-b
+crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c);
+// 443-bit multiprecision addition, c = a+b
+void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c);
+// Modular negation, a = -a mod p.
+void sike_fpneg(felm_t a);
+// Copy of a field element, c = a
+void sike_fpcopy(const felm_t a, felm_t c);
+// Copy a field element, c = a.
+void sike_fpzero(felm_t a);
+// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time.
+void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option);
+// Conversion from Montgomery representation to standard representation,
+// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
+void sike_from_mont(const felm_t ma, felm_t c);
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768
+void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc);
+// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2)
+void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void sike_fp2inv_mont(f2elm_t a);
+// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
+void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c);
+// Modular correction, a = a in GF(p^2).
+void sike_fp2correction(f2elm_t a);
+
+#if defined(__cplusplus)
+} // extern C
+#endif
+
+// GF(p^2) addition, c = a+b in GF(p^2).
+#define sike_fp2add(a, b, c) \
+do { \
+ sike_fpadd(a->c0, b->c0, c->c0); \
+ sike_fpadd(a->c1, b->c1, c->c1); \
+} while(0)
+
+// GF(p^2) subtraction, c = a-b in GF(p^2).
+#define sike_fp2sub(a,b,c) \
+do { \
+ sike_fpsub(a->c0, b->c0, c->c0); \
+ sike_fpsub(a->c1, b->c1, c->c1); \
+} while(0)
+
+// Copy a GF(p^2) element, c = a.
+#define sike_fp2copy(a, c) \
+do { \
+ sike_fpcopy(a->c0, c->c0); \
+ sike_fpcopy(a->c1, c->c1); \
+} while(0)
+
+// GF(p^2) negation, a = -a in GF(p^2).
+#define sike_fp2neg(a) \
+do { \
+ sike_fpneg(a->c0); \
+ sike_fpneg(a->c1); \
+} while(0)
+
+// GF(p^2) division by two, c = a/2 in GF(p^2).
+#define sike_fp2div2(a, c) \
+do { \
+ sike_fpdiv2(a->c0, c->c0); \
+ sike_fpdiv2(a->c1, c->c1); \
+} while(0)
+
+// Modular correction, a = a in GF(p^2).
+#define sike_fp2correction(a) \
+do { \
+ sike_fpcorrection(a->c0); \
+ sike_fpcorrection(a->c1); \
+} while(0)
+
+// Conversion of a GF(p^2) element to Montgomery representation,
+// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
+#define sike_to_fp2mont(a, mc) \
+ do { \
+ sike_fpmul_mont(a->c0, sike_params.mont_R2, mc->c0); \
+ sike_fpmul_mont(a->c1, sike_params.mont_R2, mc->c1); \
+ } while (0)
+
+// Conversion of a GF(p^2) element from Montgomery representation to standard representation,
+// c_i = ma_i*R^(-1) = a_i in GF(p^2).
+#define sike_from_fp2mont(ma, c) \
+do { \
+ sike_from_mont(ma->c0, c->c0); \
+ sike_from_mont(ma->c1, c->c1); \
+} while(0)
+
+#endif // FPX_H_
diff --git a/src/third_party/sike/isogeny.c b/src/third_party/sike/isogeny.c
new file mode 100644
index 00000000..6b910e02
--- /dev/null
+++ b/src/third_party/sike/isogeny.c
@@ -0,0 +1,260 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: elliptic curve and isogeny functions
+*********************************************************************************************/
+#include "utils.h"
+#include "isogeny.h"
+#include "fpx.h"
+
+static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
+{ // Doubling of a Montgomery point in projective coordinates (X:Z).
+ // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
+ // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
+ f2elm_t t0, t1;
+
+ sike_fp2sub(P->X, P->Z, t0); // t0 = X1-Z1
+ sike_fp2add(P->X, P->Z, t1); // t1 = X1+Z1
+ sike_fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2
+ sike_fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2
+ sike_fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2
+ sike_fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
+ sike_fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2
+ sike_fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]
+ sike_fp2add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
+ sike_fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
+}
+
+void sike_xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e)
+{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
+ // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
+ // Output: projective Montgomery x-coordinates Q <- (2^e)*P.
+
+ memmove(Q, P, sizeof(*P));
+ for (size_t i = 0; i < e; i++) {
+ xDBL(Q, Q, A24plus, C24);
+ }
+}
+
+void sike_get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff)
+{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
+ // Input: projective point of order four P = (X4:Z4).
+ // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients
+ // that are used to evaluate the isogeny at a point in eval_4_isog().
+
+ sike_fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4
+ sike_fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4
+ sike_fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2
+ sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2
+ sike_fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4
+ sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2
+ sike_fp2sqr_mont(P->X, A24plus); // A24plus = X4^2
+ sike_fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2
+ sike_fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4
+}
+
+void sike_eval_4_isog(point_proj_t P, f2elm_t* coeff)
+{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
+ // by the 3 coefficients in coeff (computed in the function get_4_isog()).
+ // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
+ // Output: the projective point P = phi(P) = (X:Z) in the codomain.
+ f2elm_t t0, t1;
+
+ sike_fp2add(P->X, P->Z, t0); // t0 = X+Z
+ sike_fp2sub(P->X, P->Z, t1); // t1 = X-Z
+ sike_fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1]
+ sike_fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2]
+ sike_fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z)
+ sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z)
+ sike_fp2add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
+ sike_fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]
+ sike_fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
+ sike_fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2
+ sike_fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
+ sike_fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)
+ sike_fp2mul_mont(P->X, t1, P->X); // Xfinal
+ sike_fp2mul_mont(P->Z, t0, P->Z); // Zfinal
+}
+
+
+void sike_xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)
+{ // Tripling of a Montgomery point in projective coordinates (X:Z).
+ // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
+ // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
+ f2elm_t t0, t1, t2, t3, t4, t5, t6;
+
+ sike_fp2sub(P->X, P->Z, t0); // t0 = X-Z
+ sike_fp2sqr_mont(t0, t2); // t2 = (X-Z)^2
+ sike_fp2add(P->X, P->Z, t1); // t1 = X+Z
+ sike_fp2sqr_mont(t1, t3); // t3 = (X+Z)^2
+ sike_fp2add(t0, t1, t4); // t4 = 2*X
+ sike_fp2sub(t1, t0, t0); // t0 = 2*Z
+ sike_fp2sqr_mont(t4, t1); // t1 = 4*X^2
+ sike_fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2
+ sike_fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
+ sike_fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2
+ sike_fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3
+ sike_fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2
+ sike_fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3
+ sike_fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3
+ sike_fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
+ sike_fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
+ sike_fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3
+ sike_fp2sqr_mont(t2, t2); // t2 = t2^2
+ sike_fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2
+ sike_fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
+ sike_fp2sqr_mont(t1, t1); // t1 = t1^2
+ sike_fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1
+}
+
+void sike_xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e)
+{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
+ // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
+ // Output: projective Montgomery x-coordinates Q <- (3^e)*P.
+ memmove(Q, P, sizeof(*P));
+ for (size_t i = 0; i < e; i++) {
+ sike_xTPL(Q, Q, A24minus, A24plus);
+ }
+}
+
+void sike_get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff)
+{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
+ // Input: projective point of order three P = (X3:Z3).
+ // Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
+ f2elm_t t0, t1, t2, t3, t4;
+
+ sike_fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z
+ sike_fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2
+ sike_fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z
+ sike_fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2
+ sike_fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2
+ sike_fp2add(coeff[0], coeff[1], t3); // t3 = 2*X
+ sike_fp2sqr_mont(t3, t3); // t3 = 4*X^2
+ sike_fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2
+ sike_fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2
+ sike_fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2
+ sike_fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2
+ sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2)
+ sike_fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
+ sike_fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
+ sike_fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
+ sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2)
+ sike_fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
+ sike_fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
+ sike_fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
+ sike_fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
+}
+
+
+void sike_eval_3_isog(point_proj_t Q, f2elm_t* coeff)
+{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and
+ // a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
+ // Inputs: projective points P = (X3:Z3) and Q = (X:Z).
+ // Output: the projective point Q <- phi(Q) = (X3:Z3).
+ f2elm_t t0, t1, t2;
+
+ sike_fp2add(Q->X, Q->Z, t0); // t0 = X+Z
+ sike_fp2sub(Q->X, Q->Z, t1); // t1 = X-Z
+ sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z)
+ sike_fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z)
+ sike_fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z)
+ sike_fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z)
+ sike_fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2
+ sike_fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2
+ sike_fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2
+ sike_fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2
+}
+
+
+void sike_inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3)
+{ // 3-way simultaneous inversion
+ // Input: z1,z2,z3
+ // Output: 1/z1,1/z2,1/z3 (override inputs).
+ f2elm_t t0, t1, t2, t3;
+
+ sike_fp2mul_mont(z1, z2, t0); // t0 = z1*z2
+ sike_fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3
+ sike_fp2inv_mont(t1); // t1 = 1/(z1*z2*z3)
+ sike_fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2)
+ sike_fp2mul_mont(t2, z2, t3); // t3 = 1/z1
+ sike_fp2mul_mont(t2, z1, z2); // z2 = 1/z2
+ sike_fp2mul_mont(t0, t1, z3); // z3 = 1/z3
+ sike_fp2copy(t3, z1); // z1 = 1/z1
+}
+
+
+void sike_get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
+{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+ // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R.
+ // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
+ f2elm_t t0, t1, one = F2ELM_INIT;
+
+ extern const struct params_t sike_params;
+ sike_fpcopy(sike_params.mont_one, one->c0);
+ sike_fp2add(xP, xQ, t1); // t1 = xP+xQ
+ sike_fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ
+ sike_fp2mul_mont(xR, t1, A); // A = xR*t1
+ sike_fp2add(t0, A, A); // A = A+t0
+ sike_fp2mul_mont(t0, xR, t0); // t0 = t0*xR
+ sike_fp2sub(A, one, A); // A = A-1
+ sike_fp2add(t0, t0, t0); // t0 = t0+t0
+ sike_fp2add(t1, xR, t1); // t1 = t1+xR
+ sike_fp2add(t0, t0, t0); // t0 = t0+t0
+ sike_fp2sqr_mont(A, A); // A = A^2
+ sike_fp2inv_mont(t0); // t0 = 1/t0
+ sike_fp2mul_mont(A, t0, A); // A = A*t0
+ sike_fp2sub(A, t1, A); // Afinal = A-t1
+}
+
+
+void sike_j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
+{ // Computes the j-invariant of a Montgomery curve with projective constant.
+ // Input: A,C in GF(p^2).
+ // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.
+ f2elm_t t0, t1;
+
+ sike_fp2sqr_mont(A, jinv); // jinv = A^2
+ sike_fp2sqr_mont(C, t1); // t1 = C^2
+ sike_fp2add(t1, t1, t0); // t0 = t1+t1
+ sike_fp2sub(jinv, t0, t0); // t0 = jinv-t0
+ sike_fp2sub(t0, t1, t0); // t0 = t0-t1
+ sike_fp2sub(t0, t1, jinv); // jinv = t0-t1
+ sike_fp2sqr_mont(t1, t1); // t1 = t1^2
+ sike_fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1
+ sike_fp2add(t0, t0, t0); // t0 = t0+t0
+ sike_fp2add(t0, t0, t0); // t0 = t0+t0
+ sike_fp2sqr_mont(t0, t1); // t1 = t0^2
+ sike_fp2mul_mont(t0, t1, t0); // t0 = t0*t1
+ sike_fp2add(t0, t0, t0); // t0 = t0+t0
+ sike_fp2add(t0, t0, t0); // t0 = t0+t0
+ sike_fp2inv_mont(jinv); // jinv = 1/jinv
+ sike_fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv
+}
+
+
+void sike_xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
+{ // Simultaneous doubling and differential addition.
+ // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
+ // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
+ f2elm_t t0, t1, t2;
+
+ sike_fp2add(P->X, P->Z, t0); // t0 = XP+ZP
+ sike_fp2sub(P->X, P->Z, t1); // t1 = XP-ZP
+ sike_fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2
+ sike_fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ
+ sike_fp2correction(t2);
+ sike_fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ
+ sike_fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ)
+ sike_fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2
+ sike_fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ)
+ sike_fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2
+ sike_fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2
+ sike_fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
+ sike_fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
+ sike_fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
+ sike_fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
+ sike_fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
+ sike_fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+ sike_fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
+ sike_fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+}
diff --git a/src/third_party/sike/isogeny.h b/src/third_party/sike/isogeny.h
new file mode 100644
index 00000000..18337dd2
--- /dev/null
+++ b/src/third_party/sike/isogeny.h
@@ -0,0 +1,49 @@
+#ifndef ISOGENY_H_
+#define ISOGENY_H_
+
+// Computes [2^e](X:Z) on Montgomery curve with projective
+// constant via e repeated doublings.
+void sike_xDBLe(
+ const point_proj_t P, point_proj_t Q, const f2elm_t A24plus,
+ const f2elm_t C24, size_t e);
+// Simultaneous doubling and differential addition.
+void sike_xDBLADD(
+ point_proj_t P, point_proj_t Q, const f2elm_t xPQ,
+ const f2elm_t A24);
+// Tripling of a Montgomery point in projective coordinates (X:Z).
+void sike_xTPL(
+ const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
+ const f2elm_t A24plus);
+// Computes [3^e](X:Z) on Montgomery curve with projective constant
+// via e repeated triplings.
+void sike_xTPLe(
+ const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
+ const f2elm_t A24plus, size_t e);
+// Given the x-coordinates of P, Q, and R, returns the value A
+// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+void sike_get_A(
+ const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A);
+// Computes the j-invariant of a Montgomery curve with projective constant.
+void sike_j_inv(
+ const f2elm_t A, const f2elm_t C, f2elm_t jinv);
+// Computes the corresponding 4-isogeny of a projective Montgomery
+// point (X4:Z4) of order 4.
+void sike_get_4_isog(
+ const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff);
+// Computes the corresponding 3-isogeny of a projective Montgomery
+// point (X3:Z3) of order 3.
+void sike_get_3_isog(
+ const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus,
+ f2elm_t* coeff);
+// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3)
+// of order 3 on a Montgomery curve and a point P with coefficients given in coeff.
+void sike_eval_3_isog(
+ point_proj_t Q, f2elm_t* coeff);
+// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny.
+void sike_eval_4_isog(
+ point_proj_t P, f2elm_t* coeff);
+// 3-way simultaneous inversion
+void sike_inv_3_way(
+ f2elm_t z1, f2elm_t z2, f2elm_t z3);
+
+#endif // ISOGENY_H_
diff --git a/src/third_party/sike/sike.c b/src/third_party/sike/sike.c
new file mode 100644
index 00000000..87b74174
--- /dev/null
+++ b/src/third_party/sike/sike.c
@@ -0,0 +1,531 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
+*********************************************************************************************/
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <openssl/bn.h>
+#include <openssl/base.h>
+#include <openssl/rand.h>
+#include <openssl/mem.h>
+#include <openssl/sha.h>
+
+#include "utils.h"
+#include "isogeny.h"
+#include "fpx.h"
+
+extern const struct params_t sike_params;
+
+// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant.
+#define SIDH_JINV_BYTESZ 110U
+// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny)
+#define SIDH_PRV_A_BITSZ 216U
+// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny)
+#define SIDH_PRV_B_BITSZ 217U
+// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation
+#define MAX_INT_POINTS_ALICE 7U
+// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation
+#define MAX_INT_POINTS_BOB 8U
+
+// Swap points.
+// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+#if !defined(OPENSSL_X86_64) || defined(OPENSSL_NO_ASM)
+static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
+{
+ crypto_word_t temp;
+ for (size_t i = 0; i < NWORDS_FIELD; i++) {
+ temp = option & (P->X->c0[i] ^ Q->X->c0[i]);
+ P->X->c0[i] = temp ^ P->X->c0[i];
+ Q->X->c0[i] = temp ^ Q->X->c0[i];
+ temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]);
+ P->Z->c0[i] = temp ^ P->Z->c0[i];
+ Q->Z->c0[i] = temp ^ Q->Z->c0[i];
+ temp = option & (P->X->c1[i] ^ Q->X->c1[i]);
+ P->X->c1[i] = temp ^ P->X->c1[i];
+ Q->X->c1[i] = temp ^ Q->X->c1[i];
+ temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]);
+ P->Z->c1[i] = temp ^ P->Z->c1[i];
+ Q->Z->c1[i] = temp ^ Q->Z->c1[i];
+ }
+}
+#endif
+
+// Swap points.
+// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
+{
+#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM)
+ sike_cswap_asm(P, Q, option);
+#else
+ sike_cswap(P, Q, option);
+#endif
+}
+
+static void ladder3Pt(
+ const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,
+ int is_A, point_proj_t R, const f2elm_t A) {
+ point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT;
+ f2elm_t A24 = F2ELM_INIT;
+ crypto_word_t mask;
+ int bit, swap, prevbit = 0;
+
+ const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ;
+
+ // Initializing constant
+ sike_fpcopy(sike_params.mont_one, A24[0].c0);
+ sike_fp2add(A24, A24, A24);
+ sike_fp2add(A, A24, A24);
+ sike_fp2div2(A24, A24);
+ sike_fp2div2(A24, A24); // A24 = (A+2)/4
+
+ // Initializing points
+ sike_fp2copy(xQ, R0->X);
+ sike_fpcopy(sike_params.mont_one, R0->Z[0].c0);
+ sike_fp2copy(xPQ, R2->X);
+ sike_fpcopy(sike_params.mont_one, R2->Z[0].c0);
+ sike_fp2copy(xP, R->X);
+ sike_fpcopy(sike_params.mont_one, R->Z[0].c0);
+ memset(R->Z->c1, 0, sizeof(R->Z->c1));
+
+ // Main loop
+ for (size_t i = 0; i < nbits; i++) {
+ bit = (m[i >> 3] >> (i & 7)) & 1;
+ swap = bit ^ prevbit;
+ prevbit = bit;
+ mask = 0 - (crypto_word_t)swap;
+
+ sike_fp2cswap(R, R2, mask);
+ sike_xDBLADD(R0, R2, R->X, A24);
+ sike_fp2mul_mont(R2->X, R->Z, R2->X);
+ }
+
+ mask = 0 - (crypto_word_t)prevbit;
+ sike_fp2cswap(R, R2, mask);
+}
+
+// Initialization of basis points
+static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) {
+ sike_fpcopy(gen, XP->c0);
+ sike_fpcopy(gen + NWORDS_FIELD, XP->c1);
+ sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0);
+ sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1);
+ sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0);
+ sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1);
+}
+
+// Conversion of GF(p^2) element from Montgomery to standard representation.
+static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) {
+ f2elm_t t;
+ sike_from_fp2mont(x, t);
+
+ // convert to bytes in little endian form
+ for (size_t i=0; i<FIELD_BYTESZ; i++) {
+ enc[i+ 0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
+ enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
+ }
+}
+
+// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation.
+// Elements over GF(p503) are encoded in 63 octets in little endian format
+// (i.e., the least significant octet is located in the lowest memory address).
+static inline void fp2_decode(const uint8_t *enc, f2elm_t t) {
+ memset(t[0].c0, 0, sizeof(t[0].c0));
+ memset(t[0].c1, 0, sizeof(t[0].c1));
+ // convert bytes in little endian form to f2elm_t
+ for (size_t i = 0; i < FIELD_BYTESZ; i++) {
+ t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (8*(i%LSZ));
+ t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ));
+ }
+ sike_to_fp2mont(t, t);
+}
+
+// Alice's ephemeral public key generation
+// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes.
+// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
+static void gen_iso_A(const uint8_t* skA, uint8_t* pkA)
+{
+ point_proj_t R, pts[MAX_INT_POINTS_ALICE];
+ point_proj_t phiP = POINT_PROJ_INIT;
+ point_proj_t phiQ = POINT_PROJ_INIT;
+ point_proj_t phiR = POINT_PROJ_INIT;
+ f2elm_t XPA, XQA, XRA, coeff[3];
+ f2elm_t A24plus = F2ELM_INIT;
+ f2elm_t C24 = F2ELM_INIT;
+ f2elm_t A = F2ELM_INIT;
+ unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
+
+ // Initialize basis points
+ sike_init_basis(sike_params.A_gen, XPA, XQA, XRA);
+ sike_init_basis(sike_params.B_gen, phiP->X, phiQ->X, phiR->X);
+ sike_fpcopy(sike_params.mont_one, (phiP->Z)->c0);
+ sike_fpcopy(sike_params.mont_one, (phiQ->Z)->c0);
+ sike_fpcopy(sike_params.mont_one, (phiR->Z)->c0);
+
+ // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1
+ sike_fpcopy(sike_params.mont_one, A24plus->c0);
+ sike_fp2add(A24plus, A24plus, A24plus);
+ sike_fp2add(A24plus, A24plus, C24);
+ sike_fp2add(A24plus, C24, A);
+ sike_fp2add(C24, C24, A24plus);
+
+ // Retrieve kernel point
+ ladder3Pt(XPA, XQA, XRA, skA, 1, R, A);
+
+ // Traverse tree
+ index = 0;
+ for (size_t row = 1; row < A_max; row++) {
+ while (index < A_max-row) {
+ sike_fp2copy(R->X, pts[npts]->X);
+ sike_fp2copy(R->Z, pts[npts]->Z);
+ pts_index[npts++] = index;
+ m = sike_params.A_strat[ii++];
+ sike_xDBLe(R, R, A24plus, C24, (2*m));
+ index += m;
+ }
+ sike_get_4_isog(R, A24plus, C24, coeff);
+
+ for (size_t i = 0; i < npts; i++) {
+ sike_eval_4_isog(pts[i], coeff);
+ }
+ sike_eval_4_isog(phiP, coeff);
+ sike_eval_4_isog(phiQ, coeff);
+ sike_eval_4_isog(phiR, coeff);
+
+ sike_fp2copy(pts[npts-1]->X, R->X);
+ sike_fp2copy(pts[npts-1]->Z, R->Z);
+ index = pts_index[npts-1];
+ npts -= 1;
+ }
+
+ sike_get_4_isog(R, A24plus, C24, coeff);
+ sike_eval_4_isog(phiP, coeff);
+ sike_eval_4_isog(phiQ, coeff);
+ sike_eval_4_isog(phiR, coeff);
+
+ sike_inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
+ sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
+ sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
+ sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
+
+ // Format public key
+ sike_fp2_encode(phiP->X, pkA);
+ sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ);
+ sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ);
+}
+
+// Bob's ephemeral key-pair generation
+// It produces a private key skB and computes the public key pkB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
+// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
+static void gen_iso_B(const uint8_t* skB, uint8_t* pkB)
+{
+ point_proj_t R, pts[MAX_INT_POINTS_BOB];
+ point_proj_t phiP = POINT_PROJ_INIT;
+ point_proj_t phiQ = POINT_PROJ_INIT;
+ point_proj_t phiR = POINT_PROJ_INIT;
+ f2elm_t XPB, XQB, XRB, coeff[3];
+ f2elm_t A24plus = F2ELM_INIT;
+ f2elm_t A24minus = F2ELM_INIT;
+ f2elm_t A = F2ELM_INIT;
+ unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
+
+ // Initialize basis points
+ sike_init_basis(sike_params.B_gen, XPB, XQB, XRB);
+ sike_init_basis(sike_params.A_gen, phiP->X, phiQ->X, phiR->X);
+ sike_fpcopy(sike_params.mont_one, (phiP->Z)->c0);
+ sike_fpcopy(sike_params.mont_one, (phiQ->Z)->c0);
+ sike_fpcopy(sike_params.mont_one, (phiR->Z)->c0);
+
+ // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1
+ sike_fpcopy(sike_params.mont_one, A24plus->c0);
+ sike_fp2add(A24plus, A24plus, A24plus);
+ sike_fp2add(A24plus, A24plus, A24minus);
+ sike_fp2add(A24plus, A24minus, A);
+ sike_fp2add(A24minus, A24minus, A24plus);
+
+ // Retrieve kernel point
+ ladder3Pt(XPB, XQB, XRB, skB, 0, R, A);
+
+ // Traverse tree
+ index = 0;
+ for (size_t row = 1; row < B_max; row++) {
+ while (index < B_max-row) {
+ sike_fp2copy(R->X, pts[npts]->X);
+ sike_fp2copy(R->Z, pts[npts]->Z);
+ pts_index[npts++] = index;
+ m = sike_params.B_strat[ii++];
+ sike_xTPLe(R, R, A24minus, A24plus, m);
+ index += m;
+ }
+ sike_get_3_isog(R, A24minus, A24plus, coeff);
+
+ for (size_t i = 0; i < npts; i++) {
+ sike_eval_3_isog(pts[i], coeff);
+ }
+ sike_eval_3_isog(phiP, coeff);
+ sike_eval_3_isog(phiQ, coeff);
+ sike_eval_3_isog(phiR, coeff);
+
+ sike_fp2copy(pts[npts-1]->X, R->X);
+ sike_fp2copy(pts[npts-1]->Z, R->Z);
+ index = pts_index[npts-1];
+ npts -= 1;
+ }
+
+ sike_get_3_isog(R, A24minus, A24plus, coeff);
+ sike_eval_3_isog(phiP, coeff);
+ sike_eval_3_isog(phiQ, coeff);
+ sike_eval_3_isog(phiR, coeff);
+
+ sike_inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
+ sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
+ sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
+ sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
+
+ // Format public key
+ sike_fp2_encode(phiP->X, pkB);
+ sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ);
+ sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ);
+}
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB
+// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes.
+// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes.
+// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes.
+static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA)
+{
+ point_proj_t R, pts[MAX_INT_POINTS_ALICE];
+ f2elm_t coeff[3], PKB[3], jinv;
+ f2elm_t A24plus = F2ELM_INIT;
+ f2elm_t C24 = F2ELM_INIT;
+ f2elm_t A = F2ELM_INIT;
+ unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
+
+ // Initialize images of Bob's basis
+ fp2_decode(pkB, PKB[0]);
+ fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]);
+ fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]);
+
+ // Initialize constants
+ sike_get_A(PKB[0], PKB[1], PKB[2], A);
+ sike_fpadd(sike_params.mont_one, sike_params.mont_one, C24->c0);
+ sike_fp2add(A, C24, A24plus);
+ sike_fpadd(C24->c0, C24->c0, C24->c0);
+
+ // Retrieve kernel point
+ ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A);
+
+ // Traverse tree
+ index = 0;
+ for (size_t row = 1; row < A_max; row++) {
+ while (index < A_max-row) {
+ sike_fp2copy(R->X, pts[npts]->X);
+ sike_fp2copy(R->Z, pts[npts]->Z);
+ pts_index[npts++] = index;
+ m = sike_params.A_strat[ii++];
+ sike_xDBLe(R, R, A24plus, C24, (2*m));
+ index += m;
+ }
+ sike_get_4_isog(R, A24plus, C24, coeff);
+
+ for (size_t i = 0; i < npts; i++) {
+ sike_eval_4_isog(pts[i], coeff);
+ }
+
+ sike_fp2copy(pts[npts-1]->X, R->X);
+ sike_fp2copy(pts[npts-1]->Z, R->Z);
+ index = pts_index[npts-1];
+ npts -= 1;
+ }
+
+ sike_get_4_isog(R, A24plus, C24, coeff);
+ sike_fp2add(A24plus, A24plus, A24plus);
+ sike_fp2sub(A24plus, C24, A24plus);
+ sike_fp2add(A24plus, A24plus, A24plus);
+ sike_j_inv(A24plus, C24, jinv);
+ sike_fp2_encode(jinv, ssA);
+}
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA
+// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
+// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes.
+// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes.
+static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB)
+{
+ point_proj_t R, pts[MAX_INT_POINTS_BOB];
+ f2elm_t coeff[3], PKB[3], jinv;
+ f2elm_t A24plus = F2ELM_INIT;
+ f2elm_t A24minus = F2ELM_INIT;
+ f2elm_t A = F2ELM_INIT;
+ unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
+
+ // Initialize images of Alice's basis
+ fp2_decode(pkA, PKB[0]);
+ fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]);
+ fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]);
+
+ // Initialize constants
+ sike_get_A(PKB[0], PKB[1], PKB[2], A);
+ sike_fpadd(sike_params.mont_one, sike_params.mont_one, A24minus->c0);
+ sike_fp2add(A, A24minus, A24plus);
+ sike_fp2sub(A, A24minus, A24minus);
+
+ // Retrieve kernel point
+ ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A);
+
+ // Traverse tree
+ index = 0;
+ for (size_t row = 1; row < B_max; row++) {
+ while (index < B_max-row) {
+ sike_fp2copy(R->X, pts[npts]->X);
+ sike_fp2copy(R->Z, pts[npts]->Z);
+ pts_index[npts++] = index;
+ m = sike_params.B_strat[ii++];
+ sike_xTPLe(R, R, A24minus, A24plus, m);
+ index += m;
+ }
+ sike_get_3_isog(R, A24minus, A24plus, coeff);
+
+ for (size_t i = 0; i < npts; i++) {
+ sike_eval_3_isog(pts[i], coeff);
+ }
+
+ sike_fp2copy(pts[npts-1]->X, R->X);
+ sike_fp2copy(pts[npts-1]->Z, R->Z);
+ index = pts_index[npts-1];
+ npts -= 1;
+ }
+
+ sike_get_3_isog(R, A24minus, A24plus, coeff);
+ sike_fp2add(A24plus, A24minus, A);
+ sike_fp2add(A, A, A);
+ sike_fp2sub(A24plus, A24minus, A24plus);
+ sike_j_inv(A, A24plus, jinv);
+ sike_fp2_encode(jinv, ssB);
+}
+
+int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ],
+ uint8_t out_pub[SIKE_PUB_BYTESZ]) {
+ int ret = 0;
+
+ // Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and <
+ // 253 bits
+ BIGNUM *bn_sidh_prv = BN_new();
+ if (!bn_sidh_prv ||
+ !BN_rand(bn_sidh_prv, SIDH_PRV_B_BITSZ, BN_RAND_TOP_ONE,
+ BN_RAND_BOTTOM_ANY) ||
+ !BN_bn2le_padded(out_priv, BITS_TO_BYTES(SIDH_PRV_B_BITSZ),
+ bn_sidh_prv)) {
+ goto end;
+ }
+
+ gen_iso_B(out_priv, out_pub);
+ ret = 1;
+
+end:
+ BN_free(bn_sidh_prv);
+ return ret;
+}
+
+void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
+ uint8_t out_ciphertext[SIKE_CT_BYTESZ],
+ const uint8_t pub_key[SIKE_PUB_BYTESZ]) {
+ // Secret buffer is reused by the function to store some ephemeral
+ // secret data. It's size must be maximum of SHA256_CBLOCK,
+ // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
+ uint8_t secret[SHA256_CBLOCK];
+ uint8_t j[SIDH_JINV_BYTESZ];
+ uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ];
+ SHA256_CTX ctx;
+
+ // Generate secret key for A
+ // secret key A = SHA256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ
+ RAND_bytes(temp, SIKE_MSG_BYTESZ);
+
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, temp, SIKE_MSG_BYTESZ);
+ SHA256_Update(&ctx, pub_key, SIKE_PUB_BYTESZ);
+ SHA256_Final(secret, &ctx);
+
+ // Generate public key for A - first part of the ciphertext
+ gen_iso_A(secret, out_ciphertext);
+
+ // Generate c1:
+ // h = SHA256(j-invariant)
+ // c1 = h ^ m
+ ex_iso_A(secret, pub_key, j);
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, j, sizeof(j));
+ SHA256_Final(secret, &ctx);
+
+ // c1 = h ^ m
+ uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ];
+ for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+ c1[i] = temp[i] ^ secret[i];
+ }
+
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, temp, SIKE_MSG_BYTESZ);
+ SHA256_Update(&ctx, out_ciphertext, SIKE_CT_BYTESZ);
+ SHA256_Final(secret, &ctx);
+ // Generate shared secret out_shared_key = SHA256(m||out_ciphertext)
+ memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
+}
+
+void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
+ const uint8_t ciphertext[SIKE_CT_BYTESZ],
+ const uint8_t pub_key[SIKE_PUB_BYTESZ],
+ const uint8_t priv_key[SIKE_PRV_BYTESZ]) {
+ // Secret buffer is reused by the function to store some ephemeral
+ // secret data. It's size must be maximum of SHA256_CBLOCK,
+ // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
+ uint8_t secret[SHA256_CBLOCK];
+ uint8_t j[SIDH_JINV_BYTESZ];
+ uint8_t c0[SIKE_PUB_BYTESZ];
+ uint8_t temp[SIKE_MSG_BYTESZ];
+ uint8_t shared_nok[SIKE_MSG_BYTESZ];
+ SHA256_CTX ctx;
+
+ // This is OK as we are only using ephemeral keys in BoringSSL
+ RAND_bytes(shared_nok, SIKE_MSG_BYTESZ);
+
+ // Recover m
+ // Let ciphertext = c0 || c1 - both have fixed sizes
+ // m = F(j-invariant(c0, priv_key)) ^ c1
+ ex_iso_B(priv_key, ciphertext, j);
+
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, j, sizeof(j));
+ SHA256_Final(secret, &ctx);
+
+ const uint8_t *c1 = &ciphertext[sizeof(c0)];
+ for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+ temp[i] = c1[i] ^ secret[i];
+ }
+
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, temp, SIKE_MSG_BYTESZ);
+ SHA256_Update(&ctx, pub_key, SIKE_PUB_BYTESZ);
+ SHA256_Final(secret, &ctx);
+
+ // Recover c0 = public key A
+ gen_iso_A(secret, c0);
+ crypto_word_t ok = constant_time_is_zero_w(
+ CRYPTO_memcmp(c0, ciphertext, SIKE_PUB_BYTESZ));
+ for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+ temp[i] = constant_time_select_8(ok, temp[i], shared_nok[i]);
+ }
+
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, temp, SIKE_MSG_BYTESZ);
+ SHA256_Update(&ctx, ciphertext, SIKE_CT_BYTESZ);
+ SHA256_Final(secret, &ctx);
+
+ // Generate shared secret out_shared_key = SHA256(m||ciphertext)
+ memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
+}
diff --git a/src/third_party/sike/sike.h b/src/third_party/sike/sike.h
new file mode 100644
index 00000000..5819ebf4
--- /dev/null
+++ b/src/third_party/sike/sike.h
@@ -0,0 +1,64 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: API header file for SIKE
+*********************************************************************************************/
+
+#ifndef SIKE_H_
+#define SIKE_H_
+
+#include <stdint.h>
+#include <openssl/base.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* SIKE
+ *
+ * SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the
+ * algorithm is provided in [SIKE]. This implementation uses 434-bit field size. The code
+ * is based on "Additional_Implementations" from PQC NIST submission package which can
+ * be found here:
+ * https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip
+ *
+ * [SIKE] https://sike.org/files/SIDH-spec.pdf
+ */
+
+// SIKE_PUB_BYTESZ is the number of bytes in a public key.
+#define SIKE_PUB_BYTESZ 330
+// SIKE_PRV_BYTESZ is the number of bytes in a private key.
+#define SIKE_PRV_BYTESZ 28
+// SIKE_SS_BYTESZ is the number of bytes in a shared key.
+#define SIKE_SS_BYTESZ 16
+// SIKE_MSG_BYTESZ is the number of bytes in a random bit string concatenated
+// with the public key (see 1.4 of SIKE).
+#define SIKE_MSG_BYTESZ 16
+// SIKE_SS_BYTESZ is the number of bytes in a ciphertext.
+#define SIKE_CT_BYTESZ (SIKE_PUB_BYTESZ + SIKE_MSG_BYTESZ)
+
+// SIKE_keypair outputs a public and secret key. Internally it uses BN_rand() as
+// an entropy source. In case of success function returns 1, otherwise 0.
+OPENSSL_EXPORT int SIKE_keypair(
+ uint8_t out_priv[SIKE_PRV_BYTESZ],
+ uint8_t out_pub[SIKE_PUB_BYTESZ]);
+
+// SIKE_encaps generates and encrypts a random session key, writing those values to
+// |out_shared_key| and |out_ciphertext|, respectively.
+OPENSSL_EXPORT void SIKE_encaps(
+ uint8_t out_shared_key[SIKE_SS_BYTESZ],
+ uint8_t out_ciphertext[SIKE_CT_BYTESZ],
+ const uint8_t pub_key[SIKE_PUB_BYTESZ]);
+
+// SIKE_decaps outputs a random session key, writing it to |out_shared_key|.
+OPENSSL_EXPORT void SIKE_decaps(
+ uint8_t out_shared_key[SIKE_SS_BYTESZ],
+ const uint8_t ciphertext[SIKE_CT_BYTESZ],
+ const uint8_t pub_key[SIKE_PUB_BYTESZ],
+ const uint8_t priv_key[SIKE_PRV_BYTESZ]);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/src/third_party/sike/sike_test.cc b/src/third_party/sike/sike_test.cc
new file mode 100644
index 00000000..2180a528
--- /dev/null
+++ b/src/third_party/sike/sike_test.cc
@@ -0,0 +1,251 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <gtest/gtest.h>
+#include <stdint.h>
+
+#include "sike.h"
+#include "fpx.h"
+#include "../../crypto/test/abi_test.h"
+
+TEST(SIKE, RoundTrip) {
+ uint8_t sk[SIKE_PRV_BYTESZ] = {0};
+ uint8_t pk[SIKE_PUB_BYTESZ] = {0};
+ uint8_t ct[SIKE_CT_BYTESZ] = {0};
+ uint8_t ss_enc[SIKE_SS_BYTESZ] = {0};
+ uint8_t ss_dec[SIKE_SS_BYTESZ] = {0};
+
+ for (size_t i = 0; i < 30; i++) {
+ EXPECT_EQ(SIKE_keypair(sk, pk), 1);
+ SIKE_encaps(ss_enc, ct, pk);
+ SIKE_decaps(ss_dec, ct, pk, sk);
+
+ EXPECT_EQ(memcmp(ss_enc, ss_dec, SIKE_SS_BYTESZ), 0);
+ }
+}
+
+TEST(SIKE, Decapsulation) {
+ const uint8_t sk[SIKE_PRV_BYTESZ] = {
+ 0xB1, 0xFD, 0x34, 0x42, 0xDB, 0x02, 0xBC, 0x9D, 0x4C, 0xD0,
+ 0x72, 0x34, 0x4D, 0xBD, 0x06, 0xDF, 0x1C, 0x7D, 0x0A, 0x88,
+ 0xB2, 0x50, 0xC4, 0xF6, 0xAE, 0xE8, 0x25, 0x01};
+
+ const uint8_t pk[SIKE_PUB_BYTESZ] = {
+ 0x6D, 0x8D, 0xF5, 0x7B, 0xCD, 0x47, 0xCA, 0xCB, 0x7A, 0x38, 0xB7, 0xA6,
+ 0x90, 0xB7, 0x37, 0x03, 0xD4, 0x6F, 0x27, 0x73, 0x74, 0x17, 0x5A, 0xA4,
+ 0x0D, 0xC6, 0x81, 0xAD, 0xDB, 0xF7, 0x18, 0xB2, 0x3C, 0x30, 0xCF, 0xAA,
+ 0x08, 0x11, 0x91, 0xCC, 0x27, 0x4E, 0xF1, 0xA6, 0xB7, 0xDA, 0xD2, 0xCF,
+ 0x99, 0x7F, 0xF7, 0xE1, 0xD0, 0xCE, 0x00, 0xD2, 0x4B, 0xA4, 0x33, 0xB4,
+ 0x87, 0x01, 0x3F, 0x02, 0xF7, 0xF9, 0xDE, 0xC3, 0x60, 0x62, 0xDA, 0x3F,
+ 0x74, 0xA9, 0x44, 0xBE, 0x19, 0xD5, 0x03, 0x2A, 0x79, 0x8C, 0xA7, 0xFF,
+ 0xEA, 0xB3, 0xBB, 0xB5, 0xD4, 0x1D, 0x8F, 0x92, 0xCE, 0x62, 0x6E, 0x99,
+ 0x24, 0xD7, 0x57, 0xFA, 0xCD, 0xB6, 0xE2, 0x8E, 0xFD, 0x22, 0x0E, 0x31,
+ 0x21, 0x01, 0x8D, 0x79, 0xF8, 0x3E, 0x27, 0xEC, 0x43, 0x40, 0xDB, 0x82,
+ 0xE5, 0xEB, 0x6C, 0x97, 0x66, 0x29, 0x15, 0x68, 0xB7, 0x4D, 0x84, 0xD1,
+ 0x8A, 0x0B, 0x12, 0x36, 0x2C, 0x0C, 0x0A, 0x6E, 0x4E, 0xDE, 0xA5, 0x8A,
+ 0xDE, 0x77, 0xDD, 0x70, 0x49, 0x73, 0xAC, 0x27, 0x6D, 0x8D, 0x25, 0x9A,
+ 0xE4, 0x25, 0xE8, 0x95, 0x8F, 0xFE, 0x90, 0x3B, 0x00, 0x69, 0x20, 0xE8,
+ 0x7C, 0xA5, 0xF5, 0x79, 0xC0, 0x61, 0x51, 0x91, 0x35, 0x25, 0x3F, 0x17,
+ 0x2F, 0x70, 0x73, 0xF0, 0x89, 0xB5, 0xC8, 0x25, 0xB8, 0xE5, 0x7E, 0x34,
+ 0xDD, 0x11, 0xE5, 0xD6, 0xC3, 0xD5, 0x29, 0x89, 0xC6, 0x2C, 0x99, 0x53,
+ 0x1D, 0x2C, 0x77, 0xB0, 0xB6, 0xA1, 0xBD, 0x79, 0xFB, 0x4A, 0xC2, 0x48,
+ 0x4C, 0x62, 0x51, 0x00, 0xE3, 0x91, 0x2A, 0xCB, 0x84, 0x03, 0x5D, 0x2D,
+ 0xC8, 0x33, 0xE9, 0x14, 0xBF, 0x74, 0x21, 0xBC, 0xF4, 0x76, 0xE5, 0x42,
+ 0xB8, 0xBD, 0xE2, 0xE7, 0x20, 0x95, 0x54, 0xF2, 0xED, 0xC0, 0x79, 0x38,
+ 0x1E, 0xD2, 0xEA, 0x1A, 0x63, 0x85, 0xE7, 0x3A, 0xDA, 0xAD, 0xAB, 0x1B,
+ 0x1E, 0x19, 0x9E, 0x73, 0xD0, 0x10, 0x2E, 0x38, 0xAC, 0x8B, 0x00, 0x6A,
+ 0x30, 0x2C, 0x3D, 0x70, 0x8E, 0x39, 0x6D, 0xC0, 0x12, 0x61, 0x7D, 0x2A,
+ 0x0A, 0x04, 0x95, 0x8E, 0x09, 0x3C, 0x7B, 0xEC, 0x2E, 0xBC, 0xE8, 0xE8,
+ 0xE8, 0x37, 0x29, 0xC4, 0x7E, 0x76, 0x48, 0xB9, 0x3B, 0x72, 0xE5, 0x99,
+ 0x9B, 0xF9, 0xE3, 0x99, 0x72, 0x3F, 0x35, 0x29, 0x85, 0xE0, 0xC8, 0xBF,
+ 0xB1, 0x6B, 0xB1, 0x6E, 0x72, 0x00};
+
+ const uint8_t ct[SIKE_CT_BYTESZ] = {
+ 0xFF, 0xEB, 0xEF, 0x4A, 0xC0, 0x57, 0x0F, 0x26, 0xAC, 0x76, 0xA8, 0xB0,
+ 0xA3, 0x5D, 0x9C, 0xD9, 0x25, 0xD1, 0x7F, 0x92, 0x5D, 0xF4, 0x23, 0x34,
+ 0xC3, 0x03, 0x10, 0xE1, 0xB0, 0x24, 0x9B, 0x44, 0x58, 0x26, 0x13, 0x56,
+ 0x83, 0x43, 0x72, 0x69, 0x28, 0x0D, 0x55, 0x07, 0x1F, 0xDB, 0xC0, 0x23,
+ 0x34, 0x83, 0x1A, 0x09, 0x9B, 0x80, 0x00, 0x64, 0x56, 0xDC, 0x79, 0x7A,
+ 0xD2, 0xCE, 0x23, 0xC9, 0x72, 0x27, 0xFC, 0x8D, 0xAB, 0xBF, 0xD3, 0x17,
+ 0xF6, 0x91, 0x7B, 0x15, 0x93, 0x83, 0x8A, 0x4F, 0x6C, 0xCA, 0x4A, 0x94,
+ 0xDA, 0xC7, 0x9D, 0xB6, 0xD6, 0xBA, 0xBD, 0x81, 0x9A, 0x78, 0xE5, 0xE5,
+ 0xBE, 0x17, 0xBC, 0xCB, 0xC8, 0x23, 0x80, 0x5F, 0x75, 0xF8, 0xDB, 0x51,
+ 0x55, 0x00, 0x25, 0x33, 0x52, 0x64, 0xB2, 0xD6, 0xD8, 0x9A, 0x2A, 0x9E,
+ 0x29, 0x99, 0x13, 0x33, 0xE2, 0xA7, 0x98, 0xAC, 0xD7, 0x79, 0x5C, 0x2F,
+ 0xBA, 0x07, 0xC3, 0x03, 0x37, 0xD6, 0xE6, 0xB5, 0xA1, 0xF5, 0x29, 0xB6,
+ 0xF6, 0xC0, 0x5C, 0x44, 0x68, 0x2B, 0x0B, 0xF5, 0x00, 0x01, 0x44, 0xD5,
+ 0xCC, 0x23, 0xB5, 0x27, 0x4F, 0xCA, 0xB4, 0x05, 0x01, 0xF9, 0xD4, 0x41,
+ 0xE0, 0xE1, 0x1E, 0xCF, 0xA9, 0xBC, 0x79, 0xD7, 0xD5, 0xF5, 0x3C, 0xE6,
+ 0x93, 0xF4, 0x6C, 0x84, 0x5A, 0x2C, 0x4B, 0xE4, 0x91, 0xB2, 0xB2, 0xB8,
+ 0xAD, 0x74, 0x9A, 0x69, 0x79, 0x4C, 0x84, 0xB7, 0xBF, 0xF1, 0x68, 0x4B,
+ 0xAE, 0x0F, 0x7F, 0x45, 0x3B, 0x18, 0x3F, 0xFA, 0x00, 0x48, 0xE0, 0x3A,
+ 0xE2, 0xC0, 0xAE, 0x00, 0xCE, 0x90, 0x28, 0xA4, 0x1B, 0xBE, 0xCA, 0x0C,
+ 0x21, 0x29, 0x64, 0x30, 0x5E, 0x35, 0xAD, 0xFD, 0x83, 0x47, 0x40, 0x6D,
+ 0x15, 0x56, 0xFC, 0xF8, 0x5F, 0xAB, 0x81, 0xFE, 0x6B, 0xE9, 0x6B, 0xED,
+ 0x27, 0x35, 0x7C, 0xD8, 0x2C, 0xD4, 0xF2, 0x11, 0xE6, 0xAF, 0xDF, 0xB8,
+ 0x91, 0x96, 0xEB, 0xF7, 0x4C, 0x8D, 0x70, 0x77, 0x90, 0x81, 0x00, 0x09,
+ 0x19, 0x27, 0x8A, 0x9E, 0xB6, 0x1A, 0xE9, 0xAC, 0x6C, 0xC9, 0xF8, 0xEA,
+ 0xA2, 0x34, 0xB8, 0xAC, 0xB3, 0xB3, 0x68, 0xA1, 0xB7, 0x29, 0x55, 0xCA,
+ 0x40, 0x23, 0x92, 0x5C, 0x0C, 0x79, 0x6B, 0xD6, 0x9F, 0x5B, 0xD2, 0xE6,
+ 0xAE, 0x04, 0xCB, 0xEC, 0xC7, 0x88, 0x18, 0xDB, 0x7A, 0xE6, 0xD6, 0xC9,
+ 0x39, 0xFD, 0x93, 0x9B, 0xC8, 0x01, 0x6F, 0x3E, 0x6C, 0x90, 0x3E, 0x73,
+ 0x76, 0x99, 0x7C, 0x48, 0xDA, 0x68, 0x48, 0x80, 0x2B, 0x63};
+
+ const uint8_t ss_exp[SIKE_SS_BYTESZ] = {0xA1, 0xF9, 0x5A, 0x67, 0xB9, 0x3D,
+ 0x1E, 0x72, 0xE8, 0xC5, 0x71, 0xF1,
+ 0x4C, 0xB2, 0xAA, 0x6D};
+
+ uint8_t ss_dec[SIKE_SS_BYTESZ] = {0};
+ SIKE_decaps(ss_dec, ct, pk, sk);
+ EXPECT_EQ(memcmp(ss_dec, ss_exp, sizeof(ss_exp)), 0);
+}
+
+// SIKE_encaps and SIKE_keypair doesn't return zeros.
+TEST(SIKE, NonZero) {
+ uint8_t sk[SIKE_PRV_BYTESZ] = {0};
+ uint8_t pk[SIKE_PUB_BYTESZ] = {0};
+ uint8_t ct[SIKE_CT_BYTESZ] = {0};
+ uint8_t ss[SIKE_SS_BYTESZ] = {0};
+
+ // Check secret and public key returned by SIKE_keypair
+ EXPECT_EQ(SIKE_keypair(sk, pk), 1);
+ uint8_t tmp = 0;
+ for (size_t i = 0; i < sizeof(sk); i++) {
+ tmp |= sk[i];
+ }
+ EXPECT_NE(tmp, 0);
+
+ tmp = 0;
+ for (size_t i = 0; i < sizeof(pk); i++) {
+ tmp |= pk[i];
+ }
+ EXPECT_NE(tmp, 0);
+
+ // Check shared secret and ciphertext returned by SIKE_encaps
+ SIKE_encaps(ss, ct, pk);
+ tmp = 0;
+ for (size_t i = 0; i < sizeof(ct); i++) {
+ tmp |= ct[i];
+ }
+ EXPECT_NE(tmp, 0);
+
+ tmp = 0;
+ for (size_t i = 0; i < sizeof(ss); i++) {
+ tmp |= ss[i];
+ }
+ EXPECT_NE(tmp, 0);
+}
+
+TEST(SIKE, Negative) {
+ uint8_t sk[SIKE_PRV_BYTESZ] = {0};
+ uint8_t pk[SIKE_PUB_BYTESZ] = {0};
+ uint8_t ct[SIKE_CT_BYTESZ] = {0};
+ uint8_t ss_enc[SIKE_SS_BYTESZ] = {0};
+ uint8_t ss_dec[SIKE_SS_BYTESZ] = {0};
+
+ EXPECT_EQ(SIKE_keypair(sk, pk), 1);
+ SIKE_encaps(ss_enc, ct, pk);
+
+ // Change cipertext
+ uint8_t ct_tmp[SIKE_CT_BYTESZ] = {0};
+ memcpy(ct_tmp, ct, sizeof(ct));
+ ct_tmp[0] = ~ct_tmp[0];
+ SIKE_decaps(ss_dec, ct_tmp, pk, sk);
+ EXPECT_NE(memcmp(ss_enc, ss_dec, SIKE_SS_BYTESZ), 0);
+
+ // Change secret key
+ uint8_t sk_tmp[SIKE_PRV_BYTESZ] = {0};
+ memcpy(sk_tmp, sk, sizeof(sk));
+ sk_tmp[0] = ~sk_tmp[0];
+ SIKE_decaps(ss_dec, ct, pk, sk_tmp);
+ EXPECT_NE(memcmp(ss_enc, ss_dec, SIKE_SS_BYTESZ), 0);
+
+ // Change public key
+ uint8_t pk_tmp[SIKE_PUB_BYTESZ] = {0};
+ memcpy(pk_tmp, pk, sizeof(pk));
+ pk_tmp[0] = ~pk_tmp[0];
+ SIKE_decaps(ss_dec, ct, pk_tmp, sk);
+ EXPECT_NE(memcmp(ss_enc, ss_dec, SIKE_SS_BYTESZ), 0);
+}
+
+TEST(SIKE, Unaligned) {
+ alignas(4) uint8_t priv[SIKE_PRV_BYTESZ + 1];
+ alignas(4) uint8_t pub[SIKE_PUB_BYTESZ + 1];
+ alignas(4) uint8_t shared_key1[SIKE_SS_BYTESZ + 1];
+ alignas(4) uint8_t ciphertext[SIKE_CT_BYTESZ + 1];
+ alignas(4) uint8_t shared_key2[SIKE_SS_BYTESZ + 1];
+
+ ASSERT_TRUE(SIKE_keypair(priv + 1, pub + 1));
+ SIKE_encaps(shared_key1 + 1, ciphertext + 1, pub + 1);
+ SIKE_decaps(shared_key2 + 1, ciphertext + 1, pub + 1, priv + 1);
+
+ EXPECT_EQ(memcmp(shared_key1 + 1, shared_key2 + 1, SIKE_SS_BYTESZ), 0);
+}
+
+#if defined(SUPPORTS_ABI_TEST) && \
+ (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64))
+TEST(SIKE, ABI) {
+ felm_t a, b, c;
+ dfelm_t d, e, f;
+ CHECK_ABI(sike_fpadd, a, b, c);
+ CHECK_ABI(sike_fpsub, a, b, c);
+ CHECK_ABI(sike_mpmul, a, b, d);
+ CHECK_ABI(sike_fprdc, d, a);
+ CHECK_ABI(sike_mpadd_asm, a, b, c);
+ CHECK_ABI(sike_mpsubx2_asm, d, e, f);
+ CHECK_ABI(sike_mpdblsubx2_asm, d, e, f);
+}
+
+// Additional tests for checking if assembly implementation
+// of MUL and REDC handles carry chains correctly.
+TEST(SIKE, CarryChains) {
+ // Expected results
+ const dfelm_t exp_mul = {
+ 0x0000000000000001, 0x0000000000000000, 0x0000000000000000,
+ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
+ 0x0000000000000000, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF,
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
+ };
+
+ const felm_t exp_redc = {
+ 0x93AA0C8C2D3235BE, 0xA8CD35DDDE399B46, 0xB9BBA5469509CA65,
+ 0x6B2FB3A5A2FB86E4, 0x585591BA6DBE862C, 0xD92D3FF5FE0938F2,
+ 0x0001E1F0EE75A1E1
+ };
+
+ // Input
+ dfelm_t in14 = {
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
+ };
+
+ felm_t in7 = {
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
+ 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
+ 0xFFFFFFFFFFFFFFFF
+ };
+
+ dfelm_t res;
+ sike_mpmul(in7, in7, res);
+ EXPECT_EQ(memcmp(exp_mul, res, sizeof(exp_mul)), 0);
+
+ // modifies in14 and in7
+ sike_fprdc(in14, in7);
+ EXPECT_EQ(memcmp(exp_redc, in7, sizeof(exp_redc)), 0);
+}
+#endif // SUPPORTS_ABI_TEST && (X86_64 || AARCH64)
diff --git a/src/third_party/sike/utils.h b/src/third_party/sike/utils.h
new file mode 100644
index 00000000..cbc83293
--- /dev/null
+++ b/src/third_party/sike/utils.h
@@ -0,0 +1,145 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: internal header file for P434
+*********************************************************************************************/
+
+#ifndef UTILS_H_
+#define UTILS_H_
+
+#include <openssl/base.h>
+
+#include "../crypto/internal.h"
+#include "sike.h"
+
+// Conversion macro from number of bits to number of bytes
+#define BITS_TO_BYTES(nbits) (((nbits)+7)/8)
+
+// Bit size of the field
+#define BITS_FIELD 434
+// Byte size of the field
+#define FIELD_BYTESZ BITS_TO_BYTES(BITS_FIELD)
+// Number of 64-bit words of a 224-bit element
+#define NBITS_ORDER 224
+#define NWORDS64_ORDER ((NBITS_ORDER+63)/64)
+// Number of elements in Alice's strategy
+#define A_max 108
+// Number of elements in Bob's strategy
+#define B_max 137
+// Word size size
+#define RADIX sizeof(crypto_word_t)*8
+// Byte size of a limb
+#define LSZ sizeof(crypto_word_t)
+
+#if defined(OPENSSL_64_BIT)
+ // Number of words of a 434-bit field element
+ #define NWORDS_FIELD 7
+ // Number of "0" digits in the least significant part of p434 + 1
+ #define ZERO_WORDS 3
+ // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
+ #define U64_TO_WORDS(x) UINT64_C(x)
+#else
+ // Number of words of a 434-bit field element
+ #define NWORDS_FIELD 14
+ // Number of "0" digits in the least significant part of p434 + 1
+ #define ZERO_WORDS 6
+ // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
+ #define U64_TO_WORDS(x) \
+ (uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32)
+#endif
+
+// Extended datatype support
+#if !defined(BORINGSSL_HAS_UINT128)
+ typedef uint64_t uint128_t[2];
+#endif
+
+// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo));
+
+// If mask |x|==0xff.ff set |x| to 1, otherwise 0
+#define M2B(x) ((x)>>(RADIX-1))
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \
+do { \
+ crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn); \
+ (sumOut) = (addend2) + tempReg; \
+ (carryOut) = M2B(constant_time_lt_w(tempReg, (crypto_word_t)(carryIn)) | \
+ constant_time_lt_w((sumOut), tempReg)); \
+} while(0)
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \
+do { \
+ crypto_word_t tempReg = (minuend) - (subtrahend); \
+ crypto_word_t borrowReg = M2B(constant_time_lt_w((minuend), (subtrahend))); \
+ borrowReg |= ((borrowIn) & constant_time_is_zero_w(tempReg)); \
+ (differenceOut) = tempReg - (crypto_word_t)(borrowIn); \
+ (borrowOut) = borrowReg; \
+} while(0)
+
+/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly,
+ which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8).
+ Defines below are used to work around the bug, and provide a way
+ to initialize f2elem_t and point_proj_t structs.
+ Bug has been fixed in GCC6 (debian stretch).
+*/
+#define F2ELM_INIT {{ {0}, {0} }}
+#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }}
+
+// Datatype for representing 434-bit field elements (448-bit max.)
+// Elements over GF(p434) are encoded in 63 octets in little endian format
+// (i.e., the least significant octet is located in the lowest memory address).
+typedef crypto_word_t felm_t[NWORDS_FIELD];
+
+// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e.
+// Fp2 element = c0 + c1*i in F_{p^2}
+// Datatype for representing double-precision 2x434-bit field elements (448-bit max.)
+// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are
+// encoded as {a, b}, with a in the lowest memory portion.
+typedef struct {
+ felm_t c0;
+ felm_t c1;
+} fp2;
+
+// Our F_{p^2} element type is a pointer to the struct.
+typedef fp2 f2elm_t[1];
+
+// Datatype for representing double-precision 2x434-bit
+// field elements in contiguous memory.
+typedef crypto_word_t dfelm_t[2*NWORDS_FIELD];
+
+// Constants used during SIKE computation.
+struct params_t {
+ // Stores a prime
+ const crypto_word_t prime[NWORDS_FIELD];
+ // Stores prime + 1
+ const crypto_word_t prime_p1[NWORDS_FIELD];
+ // Stores prime * 2
+ const crypto_word_t prime_x2[NWORDS_FIELD];
+ // Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i}
+ // in GF(prime^2), expressed in Montgomery representation
+ const crypto_word_t A_gen[6*NWORDS_FIELD];
+ // Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i}
+ // in GF(prime^2), expressed in Montgomery representation
+ const crypto_word_t B_gen[6*NWORDS_FIELD];
+ // Montgomery constant mont_R2 = (2^448)^2 mod prime
+ const crypto_word_t mont_R2[NWORDS_FIELD];
+ // Value 'one' in Montgomery representation
+ const crypto_word_t mont_one[NWORDS_FIELD];
+ // Value '6' in Montgomery representation
+ const crypto_word_t mont_six[NWORDS_FIELD];
+ // Fixed parameters for isogeny tree computation
+ const unsigned int A_strat[A_max-1];
+ const unsigned int B_strat[B_max-1];
+};
+
+// Point representation in projective XZ Montgomery coordinates.
+typedef struct {
+ f2elm_t X;
+ f2elm_t Z;
+} point_proj;
+typedef point_proj point_proj_t[1];
+
+#endif // UTILS_H_
diff --git a/src/third_party/wycheproof_testvectors/kwp_test.txt b/src/third_party/wycheproof_testvectors/kwp_test.txt
new file mode 100644
index 00000000..ef484910
--- /dev/null
+++ b/src/third_party/wycheproof_testvectors/kwp_test.txt
@@ -0,0 +1,1562 @@
+# Imported from Wycheproof's third_party/wycheproof_testvectors/kwp_test.json.
+# This file is generated by convert_wycheproof.go. Do not edit by hand.
+#
+# Algorithm: KWP
+# Generator version: 0.4.12
+
+[keySize = 128]
+
+# tcId = 1
+ct = 8cd63fa6788aa5edfa753fc87d645a672b14107c3b4519e7
+key = 6f67486d1e914419cb43c28509c7c1ea
+msg = 8dc0632d92ee0be4f740028410b08270
+result = valid
+
+# tcId = 2
+ct = e8bac475d1429034b32f9bdeec09a37f9b3704028f1e0270
+key = a0b17172bb296db7f5c869e9a36b5ce3
+msg = 615dd022d607c910f20178cbdf42060f
+result = valid
+
+# tcId = 3
+ct = 4c8bcd601b508ef399f71b841294497a4493c4a0014c0103
+key = 0e49d571c19b5250effd41d94bde39d6
+msg = f25e4de8caca363fd5f29442eb147b55
+result = valid
+
+# tcId = 4
+# wrapped key is longer than wrapping key
+ct = 9e4510cc84c4bd7abab0a8a5d7f1e6ff3e6777ca2dff9be7e223652239fe57d8
+key = e0e12959109103e30ae8b5684a22e662
+msg = dbb0f2bb2be912a20430972d9842ce3fd3b928e573e1ac8e
+result = acceptable
+
+# tcId = 5
+# wrapped key is longer than wrapping key
+ct = 8fbf39ae583bd4efa7a3e8f7b86870b34766ae7d8923a8e97b0cd289ad98cacb
+key = dd583d9f1059861430ec8b5d8a180e9b
+msg = f2e34f356362a31b51d6e02bcd333c9e6170494ca5ff5487
+result = acceptable
+
+# tcId = 6
+# wrapped key is longer than wrapping key
+ct = df2fbe5fa86418edc7b5b04a4aea724aca17e88cedc84ca8b0b0f048e64590cb
+key = faf5ccfae42b43cee2c5f0f3177a7c5d
+msg = 4e02084833660c463830483b36dab866c64c8cf7429cac3d
+result = acceptable
+
+# tcId = 7
+# wrapped key is longer than wrapping key
+ct = 67f8edf57f84ea0a35b35511d67d3f299c9984b2c07d3809c3d7f5f45091f1a8fbb937ed447677f6
+key = c2b9d23f2831ddcdeb456853d4014db9
+msg = f4cfea98e58b939cc859554385cf3a6c7f8217f728efb431c964786de8274907
+result = acceptable
+
+# tcId = 8
+# wrapped key is longer than wrapping key
+ct = 60d55a22ba7dbd7d8f317388e01e6be561d15d29f85c566f1259aa7e7dc3d5d30e0ef5f4c6267553
+key = 620a08f320cdedbf7ae551add348d95e
+msg = cec34eaf8e67e1ce619ddfc309531c42f16033a7e2cbc4f5eb3a548164e9b291
+result = acceptable
+
+# tcId = 9
+# wrapped key is longer than wrapping key
+ct = d78a8291108f0f2d8be0ec10ec08240bf4d3021f0a5ed7faba0748db73762f34a0504bd373212df2
+key = ed089ac274f8c7cea2415671a94b5e53
+msg = 6065e41df14daeeefacac5daeb7674cdc9c1f686013b797153e80ef215893299
+result = acceptable
+
+# tcId = 10
+# Round counter overflows 256
+ct = 9341221aca1c647e2afc2bdd9cf4ed6e60058eb0a84cb3fc2daf3a87d9fad0a1f8268b27aaf7201d705e72f7e2240309ad98742094e3f1c99b7faa9ae181b441f5004b8bc93cdd4160d403d0884749a3c379d47c112a45788c05c2106c98f59758d393e04c880691b0e8683a12df7f876e1e1f68b4acbae9cc8310b34d59ccf4617cee72e845df1e0e32e5b4938f2923d55f1bb5156dd8c787401e6ef241ea4073d0a59ddfcd7a53db5d89b480b030cfb9084ea8479b964f090bb612d5251eee9ef8870a45f1e76fd24abdd9b350fe148b15a4cfeb032d57b5743b3548a7ce9eec8e21a31ce832530edfd1cffd9bb37369e6463c6b373ab60d80b0a2677e92e658f7daf2a5234b7312bf2d967cd0bc809e9be2f706ae63bd632fd611f161e48ee19677f3243aa0e91f6651a1cef62feff7a72eedf830bae1dc6d89e55ccb5e6f97889c6266f7d3f2eb0aea6c8c42200febccc5916825368adc87e04e835de06fd7bc2805c219e7f0b6252563f29969b1f30cfa1a8da4b90ae7534fb849d068a7e77de7360f8af173
+key = b6121acad51038e11873aaa7e6c7be06
+msg = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+result = acceptable
+
+# tcId = 11
+# wrapping small key
+ct = a65959a600000000
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg =
+result = acceptable
+
+# tcId = 12
+# wrapping small key
+ct = 09bcbab50b8dd45ad83412e2919030d3
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 4c
+result = acceptable
+
+# tcId = 13
+# wrapping small key
+ct = 0cbe852cdce4f0b5333366f446b2b1c5
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = be52
+result = acceptable
+
+# tcId = 14
+# wrapping small key
+ct = a9dc66e03435ab3d4f97ff66f2c911a3
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 2d5244
+result = acceptable
+
+# tcId = 15
+# wrapping small key
+ct = 1b970c8ecb4187447e60e6083da03086
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 6c3d3b4c
+result = acceptable
+
+# tcId = 16
+# wrapping small key
+ct = 0344f7b34ab8ef28aaa843f276b0b3d5
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 0412ab3ec6
+result = acceptable
+
+# tcId = 17
+# wrapping small key
+ct = 17356c7148334ca1a24aab7e82a66e18
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 8ae08938929c
+result = acceptable
+
+# tcId = 18
+# wrapping small key
+ct = 1db7510a55591a455d9f8167e6db3c88
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 7c8dfbb68d72af
+result = acceptable
+
+# tcId = 19
+# wrapping small key
+ct = 936fe58b629ea6ec158145218f2361c7
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 536f8f83b64771c1
+result = acceptable
+
+# tcId = 20
+# wrapping small key
+ct = 6787816804b3127d0ca4073f1dba5c4d3db1ec9c227e6556
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 8571f282b18b64ec5e
+result = acceptable
+
+# tcId = 21
+# wrapping small key
+ct = 34131c3bfcc48af15eea8672e52927b462f81d5ba0e6260f
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 8ada889862813e364c4d
+result = acceptable
+
+# tcId = 22
+# wrapping small key
+ct = 4d1ec9287cd4dd378b9aefee79d4ed35bcb98ad9fa9fe529
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = f9c56e8058758a5c7c2baa
+result = acceptable
+
+# tcId = 23
+# wrapping small key
+ct = 7209f5b6bd5d4916f4995d280e9aa89edd5e96e3c9283ad2
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 7c7dbc83fa62206a521ed4ad
+result = acceptable
+
+# tcId = 24
+# wrapping small key
+ct = d85a1efc6ab3a40948f723d9810a5deb019b3ce0208a0d94
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = a6614daf00df6d14f50388bad5
+result = acceptable
+
+# tcId = 25
+# wrapping small key
+ct = 43509b5df3688b6e44c1a994592f4c03da34712f886e63d5
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 450580a47d7008321496bfb82f48
+result = acceptable
+
+# tcId = 26
+# wrapping small key
+ct = 16e369351c40f220d3fb1197f35da652a3a40ca3b1e99bfb
+key = 1abf4b7fa2bb62a78f09ddab04625dca
+msg = 9efd21e13855eea8907afdcd8935f4
+result = acceptable
+
+# tcId = 27
+# Modified IV
+ct = 82af032f5389caa503147d2825336eab84816fb6f8ae6df4
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 28
+# Modified IV
+ct = 4e00a9eeef87eb6d7be4ec46204d94006c216d5177d2a83c
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 29
+# Modified IV
+ct = d3dc6c3b4707a08039d621879caf419b9895482fff7bdcd0
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 30
+# Modified IV
+ct = 09d3bfc3c9c5af2b2951b06406f7ea4d84e9c37402637e2c
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 31
+# Modified IV
+ct = 3396679a4d87caf7ce7eb4707ba1c6526728f5a973191713
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 32
+# Modified IV
+ct = ec637d90d945e92929c1c873d9aa9c47bc7b172237319d15
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 33
+# Modified IV
+ct = 748f373d48d8590e2216b294b9ef94860dbb6b0b0ab625c5
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 34
+# Modified IV
+ct = 61d7c584197f257caf2583e444896f1d3ba12509b1ef725b
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 35
+# Modified IV
+ct = 7f8cda973fe58b484b120fc710b520c5636057629795f89a
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 36
+# Modified IV
+ct = ccea198029edb9d848d6ca76667b666b1dbebd1e4b1faa8d
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 37
+# Modified IV
+ct = ee08cb9d20a98b88b2d8f0e39acf34219d105dc14afbe364
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 38
+# Modified IV
+ct = 6782992bf8cff068cf41341dd2ca04adedea92e846f74411
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 39
+# Modified IV
+ct = 7ed35d0c08042dd56bb5df78056ecd21b8c797d36f57aaec
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 40
+# Modified IV
+ct = 37e3b4cefee648766a8efe73d6af12812eded603ab7141bb
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 41
+# Modified IV
+ct = fe73777d8992e07eef0d053ad5ec0bf8243fc7e0bc2b405b
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 42
+# Modified IV
+ct = 39292c91b6b826d47d502043c3ba4f41e2ce32960a0291b5
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 43
+# Modified IV
+ct = 36ef8fc13d0f1f5745e3939877b62b8ecba2f5f0b19f9e90
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 44
+# Modified IV
+ct = 7255c4eacb4105a68095e9e5b5a4bd8f9623a0da5c6fc230
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 45
+# Modified IV
+ct = ea26eec89a46ff1a628834c7247a8e4e45d8a8d3229e26cc
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 46
+# Modified IV
+ct = 508593fa85a8effd27c8a225981978fcec6e992eb488c9c2
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 47
+# Modified IV
+ct = b8a4cb22f15529864d4ced8e8abae69752a9045a084dfc3f
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 48
+# Modified IV
+ct = a0a6bf5e47e89706932b1057b680c3c81dc4d9d0b4f9153b
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 49
+# Modified IV
+ct = 11f3af4ed30e77520517c880f1d0c272a89a968dc697cb5a
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 50
+# Modified IV
+ct = 6fc912a0bda73bacfa93db4002f18f349fa30f22f7a95ab9
+key = 4f710eb6b5e28703becfc3dc52fa8bc1
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 51
+# RFC 3349 padding
+ct = 3731038571c35f7dcc55e48892de353e54c079b89774bbfd
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 000102030405060708090a0b0c0d0e0f
+result = invalid
+
+# tcId = 52
+# Invalid encryption
+ct = d85c6bfd092df1aeae5a548e47aa7681
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 0001020304050607
+result = invalid
+
+# tcId = 53
+# padding too long
+ct = 7a92427387f5587ee825d1ffa011c40286844ecdadce31cd9678338694ea2682
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 000000000000000000000000000000000000000000000000
+result = invalid
+
+# tcId = 54
+# padding too long
+ct = a437d354606ae752894feb62c8def7d17046d8e47f9aed755fba48b3a3009e3ff67d34e26a779064
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 0000000000000000000000000000000000000000000000000000000000000000
+result = invalid
+
+# tcId = 55
+# incorrectly encoded length
+ct = e8d240d64f16d1522ae2ded42ced257dfec158ff2fe1467d
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 56
+# length = 2**32-1
+ct = 6d1bfda356b7b954e7aaccc6df953322f75be95947b02b30
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 57
+# length = 2**32-1
+ct = 17dbf878ef4076cfcaba5f81d7b123d7
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 0000000000000000
+result = invalid
+
+# tcId = 58
+# length = 2**31-1
+ct = 75c23e253478037802fae0f86af9c78d4e4d9be0c3bff89f
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 59
+# length = 2**31 + 16
+ct = 55717658c6a35e15ee36c66cce91083b63091f51525c0b51
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 60
+# data is incorrectly padded
+ct = 8ede88a52ccb8a6d617456955a9f04c94d87696125ded87eebe3e97e185496d9
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = ffffffffffffffffffffffffffffffffffffffffffffffff
+result = invalid
+
+# tcId = 61
+# data is incorrectly padded
+ct = 5b4a8f1abffa51676ac8b5ddf9366c12
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 0001020304050607
+result = invalid
+
+# tcId = 62
+# length = 0
+ct = 205cc6dd9592da0ebff6b4b48a0c450eeaeb11a60d33f387
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 63
+# RFC 3349 padding with incorrect size
+ct = 908a68b0d2054e199220d37c34a2e136
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 0001020304050607
+result = invalid
+
+# tcId = 64
+# length = 9
+ct = f84bdb15045cee3a8a0f3ed2f07c1771
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 0000000000000000
+result = invalid
+
+# tcId = 65
+# length = 16
+ct = 7592b1ee6ee92c9467db366adcfa65bb
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 0000000000000000
+result = invalid
+
+# tcId = 66
+# length = 2**31 + 8
+ct = db93a1db3b5babc80a304d527682c1ef
+key = 48a53c11ef2d727db7eb9a834b134ea9
+msg = 0000000000000000
+result = invalid
+
+[keySize = 192]
+
+# tcId = 67
+ct = 5c117a678223cfe5ee691503061e7ab1e5f720e005171b32
+key = f75a2f49a630c7dc91626b00ce029f0bd2981d7c74a93ebe
+msg = 9adbc00c710b1101bdf6a4ed65b32d72
+result = valid
+
+# tcId = 68
+ct = 6a7f9e03b6f379c56da3a56d8f32eba515454a91fd417449
+key = b713f6b7814f98894d7b153974684359f1460213eb74be68
+msg = 78585f0c49922e82caf17ebc3721b4db
+result = valid
+
+# tcId = 69
+ct = 764097f5ee8236bc0d93bbcea139a652f4b211cc33a61ac9
+key = 13ecf423211caa334ba6db37259a535c20de8ad10fc8c432
+msg = 4fc75d0f221e22408a37e11265d49a05
+result = valid
+
+# tcId = 70
+ct = 04b83ec803a75bbcb2f87fc6f488a4ccc1827b412483070eed195b6f0048ccbe
+key = 4417fbbea51bdd91818d74051957dd70e135c5cf3732bdf1
+msg = f5357da9f8fd4a1190f36e9fa09a90fcf14d87d62332f1a5
+result = valid
+
+# tcId = 71
+ct = 46ab71f032cb1ccbcc7447a5183574268c0167a26a93fe8422bf284417aa93ea
+key = b3f26d8a22fdd61f709841231fbde695b3f28dddced6d41e
+msg = 0d0af955d2e3829cc3d643219b301e64e0510dfbc428119a
+result = valid
+
+# tcId = 72
+ct = 47ca298ee47b1b755a499129347e11e7a25754ccb6c2689e8eff270e98c81d18
+key = f70cfb262c729a18206c8afd74356ec7e049d10b44a6e000
+msg = 241cedfa64c4e7bec541a2eb4c368269e0f0ddebc58267ea
+result = valid
+
+# tcId = 73
+# wrapped key is longer than wrapping key
+ct = ecac4c91758e1ae7bb010c34f4c5f99a3d728b9fa92cb778d3fe80d777a20d3de85ef46e7a0c6a6a
+key = 1639f9f81e53e2eeb677a249e5eced3af108971301601a7b
+msg = ec3c6a1f1a9585327fe658490c74635e5300876da5846a629398984fb551d691
+result = acceptable
+
+# tcId = 74
+# wrapped key is longer than wrapping key
+ct = 39b7326a44eaed08bffbd4aeaf3e2c3f899c1fd049384ed7b3eb92b788c6449acd6385f0bb18cf28
+key = 1f22d5658aa685b8ba8659dc342880d5b2399e6a815005b0
+msg = 50be4c1b2f29a63f44d7fc63737f600f0194ea3fb36e173d2ddd19f218656380
+result = acceptable
+
+# tcId = 75
+# wrapped key is longer than wrapping key
+ct = 3d2e9f39c7b13e9585227c4344fbe596f92b002456616f137deacc6a8c941649ce294bb2695c1807
+key = 3a2f4aa50441954bba5a1836294ce071f9296b23dbed6771
+msg = 65da02ff21b483a1e39575490b4319e84ae0299f1f00b3859fbe2e74b3ec2aaf
+result = acceptable
+
+# tcId = 76
+# Round counter overflows 256
+ct = d6aacfb52c26baae78c2f54259a4e4168f817064344e2ba8fbfa7fae9f1fd69bd5bc5c1e20a6101b4a7119cbce028e25a9e93d29ee260c4e609baedee788411c2afe60218ce1b0d28b9c29b941251fdcbac3009d59040a0337b8b4a3a020c6d8f310cba63db046d8f36b64c9092e75cee463fc7692ef56bed395c4579da0ecb02129e45ad8a7f116aac6170204888e40693f017a6a0a7dd3962004e60db3a9b6c8b7614a467ccb799bce1ba83f5c0921f1e52bb3909bc0486ec0eaea736498f3ba520a519c3ddf491307958620b737613417b15b438b80b43189baa455031f5771502002ea170c767b33d247feebce62e606f2262537f85f18d1951cc75cedef291c6a501cb1778586249b58156eb8d7283a3f508ee8bcc1206d77bbd6892fe74b865bfc02a8f07223087a6c1e50a41b7cf5f6ee04bd07766b2e5b34c4a7666b0ce06f670e6434a59fb74e0df36c91d94e5e8b721e53e09b6f6504c5d515492a373fcc348a63122cc6e4716e0e1a543d038c6f7731199f691780a8a655cca6718e3dc56e815b3669
+key = b6121acad51038e11873aaa7e6c7be06f93826b74fec0ea1
+msg = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+result = acceptable
+
+# tcId = 77
+# wrapping small key
+ct = a65959a600000000
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg =
+result = acceptable
+
+# tcId = 78
+# wrapping small key
+ct = 52c7f388d0d4237afaa29f2b94723475
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = a3
+result = acceptable
+
+# tcId = 79
+# wrapping small key
+ct = 833431ce8799be69b36aafe3f38d9dac
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 594b
+result = acceptable
+
+# tcId = 80
+# wrapping small key
+ct = 31674f46b989f6ead582c70dedc8c6b9
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 72ab34
+result = acceptable
+
+# tcId = 81
+# wrapping small key
+ct = 80535172d2a498aa31601d70fdca9dea
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = d4d9460f
+result = acceptable
+
+# tcId = 82
+# wrapping small key
+ct = 56232300dd7b2a71d2328b6df47af8e3
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 643972e552
+result = acceptable
+
+# tcId = 83
+# wrapping small key
+ct = e27e08efe39adbbad8d300b87be2c258
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = f3cdb73d2561
+result = acceptable
+
+# tcId = 84
+# wrapping small key
+ct = 8f90942cdab33e58b24a23ad7efb7538
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 7b0b53b6429e14
+result = acceptable
+
+# tcId = 85
+# wrapping small key
+ct = 0ebaf23c858015d3bda5b8d908db6049
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 6b2393773e6d1378
+result = acceptable
+
+# tcId = 86
+# wrapping small key
+ct = d56f89977b8eff511158edad6b993007189e5a4b8c0e2faf
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 2c52d6639e769960e8
+result = acceptable
+
+# tcId = 87
+# wrapping small key
+ct = dd889475a76733849f59bed49a15d4315bdb5ba00dc63470
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 707c9356216d69c69048
+result = acceptable
+
+# tcId = 88
+# wrapping small key
+ct = 1a9b3369239b0f40a8dc5bd8d965caf7431445799337b99b
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 615f6fa79e1847e7359a8a
+result = acceptable
+
+# tcId = 89
+# wrapping small key
+ct = 5232f8f6679a17d3303b0bd72b06b56b5089e80372dc295b
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 7f5e999168ec60624426cbb1
+result = acceptable
+
+# tcId = 90
+# wrapping small key
+ct = e5544361c60980f3d38f2d8820a150f48f49ef3f9184b29f
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 3f93aaf4463775baf6c0c975ae
+result = acceptable
+
+# tcId = 91
+# wrapping small key
+ct = 55396065905915ec914b8d1efbf471e37d283fc2c1496b49
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = fefcf10c976309b2beb085771e50
+result = acceptable
+
+# tcId = 92
+# wrapping small key
+ct = d90376be302a24c541bd6d96094f0025e3d73888391b4306
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b
+msg = 6854354d0099f7eff740b0587140b3
+result = acceptable
+
+# tcId = 93
+# Modified IV
+ct = eee27510be39cc88379459420f3773642a423ac1ff0cfb84
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 94
+# Modified IV
+ct = 765df3fa1aca6f13268ba79f8659807049a313a0308b643e
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 95
+# Modified IV
+ct = 71346c17a2718cb7c357e3af2b2d0c3e29b7e02317926746
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 96
+# Modified IV
+ct = 55fd49ba081fdf72896068c5a968e2b3c4a473786a2e12c2
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 97
+# Modified IV
+ct = 133c66fcbf0e9d5139eff3fcb494b672d72bb622d7015c4b
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 98
+# Modified IV
+ct = 8439244f27470e5f1f294cfa22ef5412675d7fbbd92ff016
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 99
+# Modified IV
+ct = 4265bdb7d8ea30d9a51e5f48b7ac5487e0c95f154ea8baeb
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 100
+# Modified IV
+ct = 31afcca8ff2b8806408c3460181ee5a96bbaf51d133211be
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 101
+# Modified IV
+ct = 196f2a6eccb5368fe6a3f2fa0874d8fc9b3b52484e2d6351
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 102
+# Modified IV
+ct = 8dc73d363fdb32f6e0ff830c2a48db5815f66d0922694c74
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 103
+# Modified IV
+ct = e90022b9da998b4a30c91c1bd1a1f8ca05a52432867e5e78
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 104
+# Modified IV
+ct = c9898a1b70bd718df45f1f3eca82eab1eaddb8ed7f2380dc
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 105
+# Modified IV
+ct = cc3f2cd6476eddbbfdc801b61174301688554f3db54c2903
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 106
+# Modified IV
+ct = 23e15705e7b00d82bd052f0e0135ab7ac0dcce471ff2f1a7
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 107
+# Modified IV
+ct = b8e2862c0f9eae4f44ad99496e3ed62b3b9c4ce7ab5afb74
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 108
+# Modified IV
+ct = 66d8a7769d81421efda456992f6c26cb17665fe080b0160e
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 109
+# Modified IV
+ct = a6a28bceb91551a395369ff09370658cc92b092855f417aa
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 110
+# Modified IV
+ct = 03ff601cf12b432078a2185590fb5d01e3441cf084bcb04a
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 111
+# Modified IV
+ct = e250d358d16d9fd20ad80a99656509229dca391aad3798f0
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 112
+# Modified IV
+ct = 643a17860b116ec74089bc574685a6328a3d7a07cd18b520
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 113
+# Modified IV
+ct = 3e86e8128904f753c0f3fe3401ba36672966567725c4726c
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 114
+# Modified IV
+ct = d4b8cc849176b8344b0849490143d3512915171bd7d5759e
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 115
+# Modified IV
+ct = f84e0e6ff64e0b27b8b59b5b77c223023f0fea95433864ec
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 116
+# Modified IV
+ct = 4030b4b0e9c1b1ce8e52f6bdb48088e65b05844307989c8b
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 117
+# Modified IV
+ct = ccb3b36c26b2d901b7f0765362d992b2d5089c2a7559b195becbe173780352fa
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 118
+# Modified IV
+ct = 4e5fc8dccaeec9b1c8a606a2bd7d7201eede62b9c2e939a5aba663a6a040e361
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 119
+# Modified IV
+ct = af21f5e7f15a63c8ea6001cf024f281e7f44aedd68954564fc2bd146e96d793a
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 120
+# Modified IV
+ct = 3a4f571ffbf761d3f7d413172ee1e4ae2862baacfd5ab66dc685b9af8b70b538
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 121
+# Modified IV
+ct = 273de386d5fef497f9487afd54c1c0fae8aacabf2af465caf352e2300d29266b
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 122
+# Modified IV
+ct = 16511743dc44199cee1dbf5045141b075f01ee13326c9faf2c74b7c99791830f
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 123
+# Modified IV
+ct = 370f92db00f7fc8a0e654318a5b3ff89a604034f421339201d79e0ec4d6088de
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 124
+# Modified IV
+ct = e3edd0e84832f3615f6deefb444de3b9ec527741686029db91de0bb9b2a5c05d
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 125
+# RFC 3349 padding
+ct = 36ee480138edf11e144efcddd24d2c121749da6e4eab17fe
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 000102030405060708090a0b0c0d0e0f
+result = invalid
+
+# tcId = 126
+# Invalid encryption
+ct = 166beb49e97a4a9cc7b0ccf441ec15b5
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 0001020304050607
+result = invalid
+
+# tcId = 127
+# padding too long
+ct = 74ff3070a0a08471c001febb95a890f35159a9fe263719e40c2332ce5c58fada
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 000000000000000000000000000000000000000000000000
+result = invalid
+
+# tcId = 128
+# padding too long
+ct = 4f0b38eb328d1227b1e17c103a44a373ff67cee953c59eea26117947b5d3ef8932c8858b4f9fb47c
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 0000000000000000000000000000000000000000000000000000000000000000
+result = invalid
+
+# tcId = 129
+# incorrectly encoded length
+ct = 775dcabab9e4be8fd9963a4dc7a1447ef82888403882bdb6
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 130
+# length = 2**32-1
+ct = 669803237fa10eabb4d2c6ad85bd9f7df5f4a33340eb0ce9
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 131
+# length = 2**32-1
+ct = c788504d786f5c21b6671bf190657301
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 0000000000000000
+result = invalid
+
+# tcId = 132
+# length = 2**31-1
+ct = d079f60d3258f5e695d1a73db008ef38516b713eca2c0eaf
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 133
+# length = 2**31 + 16
+ct = f1ae4b8865013b0fc63b463e664cec3c6031f61f2de82f43
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 134
+# data is incorrectly padded
+ct = 8874e1b6e15e3ef6c461411a5f5ad0c8b05368cd5b3ee39b2b413d18a4eebfc9
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = ffffffffffffffffffffffffffffffffffffffffffffffff
+result = invalid
+
+# tcId = 135
+# data is incorrectly padded
+ct = 890a3dab8439bb73b14c6e99c34f0b0e
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 0001020304050607
+result = invalid
+
+# tcId = 136
+# length = 0
+ct = d4f633aedeb89e349a98738b00ee42c90d583b16e986e49f
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 137
+# RFC 3349 padding with incorrect size
+ct = b8b2a5b1d3280dcb4daeeed43f36509b
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 0001020304050607
+result = invalid
+
+# tcId = 138
+# length = 9
+ct = 4429cf64251d8a54a9d1389c01c30900
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 0000000000000000
+result = invalid
+
+# tcId = 139
+# length = 16
+ct = e5634eca10372c867c7f91ee813ec3f3
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 0000000000000000
+result = invalid
+
+# tcId = 140
+# length = 2**31 + 8
+ct = 9e517d4d0142e1544ba1e7419a696c21
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702e
+msg = 0000000000000000
+result = invalid
+
+# tcId = 141
+# RFC 3394
+ct = 138bdeaa9b8fa7fc61f97742e72248ee5ae6ae5360d1ae6a5f54f373fa543b6a
+key = 5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8
+msg = c37b7e6492584340bed12207808941155068f738
+result = valid
+
+# tcId = 142
+# RFC 3394
+ct = afbeb0f07dfbf5419200f2ccb50bb24f
+key = 5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8
+msg = 466f7250617369
+result = valid
+
+[keySize = 256]
+
+# tcId = 143
+ct = e3eab96d9a2fda12f9e252053aff15e753e5ea6f5172c92b
+key = fce0429c610658ef8e7cfb0154c51de2239a8a317f5af5b6714f985fb5c4d75c
+msg = 287326b5ed0078e7ca0164d748f667e7
+result = valid
+
+# tcId = 144
+ct = 9d2b42fb2fdb92c89fb0c3bcd9e1600d3334b4e35e791369
+key = 0dda6da5123e2c37c6fa16ba0d334cd01acd652f8994211751dfab4faac2fc22
+msg = b40b6828729b456322a8d065abc0d081
+result = valid
+
+# tcId = 145
+ct = 5291e05abd55f5886850855e3f9f2f576b101acc222d6766
+key = d6925914cd06308f81ad91e23073593d99d4e50351b20eb2a8d1a1ac4ced6588
+msg = 037b27b3dc95b19d15bd4091e320bfe1
+result = valid
+
+# tcId = 146
+ct = 4b1220525c537aec30ebcd562b694b4e9e2ccd819de22ef608b5d8090779d9de
+key = 07518a82cbc8da1dcec55f3763a206d277487abd03cedd0b8bef9ee2fb157121
+msg = faa4664d79fce3c7d2fdd462f6c1c423c2f8e6b69be2e071
+result = valid
+
+# tcId = 147
+ct = 67b2cbd68f6a208d647bdc5af7d0bccf6711a9e8fd0d9434363006addd4b9696
+key = ea46991d4e71f53dd624e7fe7fde11944a7c5942d232369b8065d42b8cd2dde1
+msg = dffc5cf1dd5411d015d84601fa38df5effe885c7f26a4825
+result = valid
+
+# tcId = 148
+ct = cfdbbd95f187508a488fe017c5e5d5a5975b68441d520e0e931922388e28784c
+key = fdcfa902c6f222f527af84da533b14b52e2615da3a89d1d35708b0cd49f60d87
+msg = 966b07047354966a703e79607b556032f4f596b7f9206f05
+result = valid
+
+# tcId = 149
+ct = b63b7e0fec7e315816233db6758fd3e744b9f6a40862bdf866487e53bcb950d8b2649269e51b4475
+key = 38e1b1d075d9d852b9a6c01c8ff6965af01bac457a4e339ae3e1d7b2ffacc0cd
+msg = 80ad6820f1c90981e2ca42b817a345c1179d0a11d8e23a8adc0505e13d87295a
+result = valid
+
+# tcId = 150
+ct = 837cfc316b49299edaf427e0988020ee876204b29d847669daab72c8660b0d860e9de3bd851198ff
+key = c641f1689d81caa8ba37d895272240664054ed974cfffc40e6c5c0cad1b916c7
+msg = 3fd0ba19955e46749f54d88e99d080b7339d588fe612ec0f4021ca3ca2104270
+result = valid
+
+# tcId = 151
+ct = 0e9e2e9aa34bbf973d67bc534ac86fc5b5a5f9da5f026866177894ec6077a5c84501510e1bf4afb3
+key = aa0ab9d68ed4a04e723f81b44c0c88d0bcde7a80cfd476eb4b8836d9aa01ec4c
+msg = 57faa8766f6d6a0aa1cf643f857c150df5b31303b50af480e21c4b5e8c8a15d5
+result = valid
+
+# tcId = 152
+# Round counter overflows 256
+ct = 1c6b7e4003384f071bf29baea9098ad81da8e9862909329f52793b35d592c10dba15aa89400ea6403df8dcaffd0dbf5606303f109f79ad700ed5d5ad4e59950ce9ce5296c9d186a0df441973d1835f9ac000ad1a6797875c3a03161e9e3f5ea464032e407854eadca5a9e7a386bb0d29253e3804adefd8c0402cc8c40ac7f9041429cc0bb77a405b284baa2dae764ea09c654c0a82f2c5724221ba44e341503d3103dbc393c7702182f8cc2762ddbc873b7f84197709886a4b5df5b04ff9d21b79b50904af3c32128dfb9cde94fe1254d981e6ce3acfda82db1fa2badbccd2d29052a04a69ce1f5652f30496ea57edc7e3e885dd4a35ca15aba602bb4c888a8064da94c2ac5c12c11f608810af46fbb49c3e8f8771ff661f8d8dccd163d0c4a401b8b9aa74e68a56011cf78d21dc7541a974f9dad5ae27f8a26d1b0e76be2f86c6a21e9d1c2b5df3c8878a8bcae143b3af1f082afc52616eeadd2232926597b245d394931e02e493b0bc27a92d013e111694cac2c5a2a46e008a8498b5c31bb5ec35a4e9957e365d
+key = b6121acad51038e11873aaa7e6c7be06f93826b74fec0ea1c02f9981ed49d16a
+msg = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+result = acceptable
+
+# tcId = 153
+# wrapping small key
+ct = a65959a600000000
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg =
+result = acceptable
+
+# tcId = 154
+# wrapping small key
+ct = 06c1e65ac0f385b4e8c400d229f39422
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = ae
+result = acceptable
+
+# tcId = 155
+# wrapping small key
+ct = c98da5936a1313eba1a6773b8060ea5e
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = c548
+result = acceptable
+
+# tcId = 156
+# wrapping small key
+ct = b2a77d9b837e87cdb7391e1df7cdaf14
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = f713b9
+result = acceptable
+
+# tcId = 157
+# wrapping small key
+ct = d8ecf20191f75aa36686298bfa5022ab
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = f375cbf7
+result = acceptable
+
+# tcId = 158
+# wrapping small key
+ct = 077362f50356fc7c54c70f9cb4306f7d
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = d9445094b1
+result = acceptable
+
+# tcId = 159
+# wrapping small key
+ct = a4bd6a116ad88a52aae3f0c0cb893f9b
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = fab43e91ae15
+result = acceptable
+
+# tcId = 160
+# wrapping small key
+ct = 68a52de00ec0f1ebbedc38fee6be0c23
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = 90735025797bd2
+result = acceptable
+
+# tcId = 161
+# wrapping small key
+ct = 3a6746052a1744cfe7e2f36dafc4042d
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = e43f5e4e123a03c4
+result = acceptable
+
+# tcId = 162
+# wrapping small key
+ct = db7e73da22219e1baac0f4e955c3db2b900b5d3078f94b59
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = 1723eb9d000916996a
+result = acceptable
+
+# tcId = 163
+# wrapping small key
+ct = f77ec14a010777f1f1071808f285c1c00b4e9420f0e8bf48
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = 8b18daecde14b8472ffd
+result = acceptable
+
+# tcId = 164
+# wrapping small key
+ct = 6b40d4f0863581a7d0365ad477568bfad94f8bf134984838
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = e5bd6fbacbf3ef0d40c884
+result = acceptable
+
+# tcId = 165
+# wrapping small key
+ct = 660f645b02405a18f7225b68c0a09a949b2b5ba784922cfe
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = b3be5e5397df5f46b099e821
+result = acceptable
+
+# tcId = 166
+# wrapping small key
+ct = 6bea6bf57601bf063873f47ec3572cfb9cfb595d8bdb5e97
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = 4cdd960cabcf8aaf69c37da1d3
+result = acceptable
+
+# tcId = 167
+# wrapping small key
+ct = b631292536aaf02d829cc6d3c39e5a5cd76240889e9d51d0
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = da29e0889cf98742612e0326300b
+result = acceptable
+
+# tcId = 168
+# wrapping small key
+ct = de497acf18a177a3a9b3d8da46d74dfa58dcc537a3a95323
+key = 1abf4b7fa2bb62a78f09ddab04625dcacdd9e551d1a69b6b162baa53d2700093
+msg = 72aaee126a822184806c7d22eed66b
+result = acceptable
+
+# tcId = 169
+# Modified IV
+ct = aef4d2357a8fc5c3b4a80a15ed49781d3a82c98eb78c9180
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 170
+# Modified IV
+ct = 6eaefd5193f0725fea545077a430860663901979f0b6f4a3
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 171
+# Modified IV
+ct = f9ded536c1ae9c680f7d9c4b91a566a07b1628e9b9f4fccd
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 172
+# Modified IV
+ct = 443526477c779a329ded0b230307afa64fdc10dfc86414dd
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 173
+# Modified IV
+ct = 21ba79f3b423a66e7baad86fe49786e07a33dfdf227687e9
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 174
+# Modified IV
+ct = 3e65dbacaae556fa18bd192035cd55958adeac30e5ca7b3b
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 175
+# Modified IV
+ct = de2054883b00f81ff68e42b7ff1c05ef5faaf75b2bb14004
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 176
+# Modified IV
+ct = 2aa3c6ba891d1211677d59f886cc6d05698243d10dc189f7
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae37
+result = invalid
+
+# tcId = 177
+# Modified IV
+ct = 9b1e7d6caf42bb3a15530f2387ed7329310ba76e1852566a
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 178
+# Modified IV
+ct = daf6a9f5e4b4985fcd4815bf6298a3039bcb32327b0876ff
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 179
+# Modified IV
+ct = 14c4079399721142fd5fce26e9417064c7e0201fb7b5255c
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 180
+# Modified IV
+ct = f48a30b8691a2a80dd79c355c281addf779bfed8971e3ce4
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 181
+# Modified IV
+ct = 248f867430ffc954b494c936a3ef815b1754009928aaf0c4
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 182
+# Modified IV
+ct = dcaa88dad9b03e59a3ac8350239824368004e2ca616c15d7
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 183
+# Modified IV
+ct = a6cc8470192687ec9a31258ddb73084005784475f3442705
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 184
+# Modified IV
+ct = 0527ab5408b4f1484b27f98641511143ab88783688256815
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070
+result = invalid
+
+# tcId = 185
+# Modified IV
+ct = 7fd3ad3aee0545da1ed3a54d5a198a2c76cf8290c011c042
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 186
+# Modified IV
+ct = a24e94c12b2e6b776c8febe9179521beae0cfbd507d358b4
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 187
+# Modified IV
+ct = 9395b071fa3d9908b2e1b349bf7cd6a1cfc86b979c8c73cd
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 188
+# Modified IV
+ct = 1eb452770bc0f26a3576b604bf5ac72f714fc468c357eba7
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 189
+# Modified IV
+ct = b42bcb4161f40b30f3d2f740f43e441d3c9a39613914f1c6
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 190
+# Modified IV
+ct = f3d76dd320e5f1b3f85b8f73a9ebcfabfb8346daafaf36e6
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 191
+# Modified IV
+ct = b8e26164496942f44f16751096fb47952ec478bb288e72a1
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 192
+# Modified IV
+ct = fa783b3aca0ec1e677378f23ebe937776fa590ecc6b01392
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5
+result = invalid
+
+# tcId = 193
+# Modified IV
+ct = 8b011408049eab81cc185796b9636982c1ad28e940e5c35ab1219434c23e8c59
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 194
+# Modified IV
+ct = 08db2f06aa2400d4cc1113b1c9e3ba1b39e3e26a84918f9266796c426c166428
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 195
+# Modified IV
+ct = 3114404be000ee167b65dd3cfae3b10c50dffe1df864b5e52a2805f0c80021c0
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 196
+# Modified IV
+ct = 405ae5bdeff8b05d28ea55900b8e81dc789d532ec3fc457730819e762172f751
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 197
+# Modified IV
+ct = 7c19e66d21c0f1409ee6f03a36ab6ba532349e2567200b95d7f5012b2b7e5d33
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 198
+# Modified IV
+ct = 955ac67d6e496b9b93a4dda8f6e65e668f1326b256ee146a7647ba18deee7986
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 199
+# Modified IV
+ct = c8600aa18be27279493fd68c84130c8bc328b0f6821e01e892b6c2dc1c005270
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 200
+# Modified IV
+ct = 492566e0dc539e234b08b95fb23594a6d14f59fa4367799495c2e7f2993135ec
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1
+result = invalid
+
+# tcId = 201
+# Modified IV
+ct = 8c5c2ea18125a03d15d2a624c9bfcccdf53709a89ae03d5728c98943b13df72c6f02fc8e1cfcdfa7
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690
+result = invalid
+
+# tcId = 202
+# Modified IV
+ct = 8836c5cb2eec2ca2541b18c1259933ebd601bd6763d9f7cebf06ed6abbe37d455aca13a2db87d111
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690
+result = invalid
+
+# tcId = 203
+# Modified IV
+ct = 2554e0faf721d77f7dfadaaa90b70c2f242f93bdc4f876cd058a86ccfff33f8fd88736997f505d98
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690
+result = invalid
+
+# tcId = 204
+# Modified IV
+ct = 53ee4c8f03212b389f5bc2b26bc898deb91a457f258a22028a688919e12c4da23090c26b5c9ff692
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690
+result = invalid
+
+# tcId = 205
+# Modified IV
+ct = 3a63b0283ec071a4d4c32b0f30b384eccb3cd8d7fb12de6806e12fef5da82a7a39aad8128c3e5915
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690
+result = invalid
+
+# tcId = 206
+# Modified IV
+ct = d1cfaaa9adc25f948c0c4720967b01488e06d3dfc5622b5de38a722798d4a3a44fa6194a92c5ede7
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690
+result = invalid
+
+# tcId = 207
+# Modified IV
+ct = 251a71511a4e73d1469a051fd88fa78cae96547fd8ca8e323b05d8717cdcd239292c7bbe0708fae5
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690
+result = invalid
+
+# tcId = 208
+# Modified IV
+ct = 14a62f7284124d795826cc89852e97dbe6b8a30ac56df07173878cf0136dbe386ec46327d6fc65f1
+key = 4f710eb6b5e28703becfc3dc52fa8bc1dd44a4a6d38a84b4f94e89ac32d987e7
+msg = a828cbda9b5ff0ae374f84fa01d070a5f0a17a0c462be4f1acce3497352690
+result = invalid
+
+# tcId = 209
+# RFC 3349 padding
+ct = ac1a774a5de27e4f9c356e4f62deaf8b7eeee6bcafafd895
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 000102030405060708090a0b0c0d0e0f
+result = invalid
+
+# tcId = 210
+# Invalid encryption
+ct = b3941437f55e7cbc3f88050aff703967
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 0001020304050607
+result = invalid
+
+# tcId = 211
+# padding too long
+ct = 86175acf19ad0b7ac60d1fe4bb7850635e7ec6f8a314f85b6dd3d8f9349ea38d
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 000000000000000000000000000000000000000000000000
+result = invalid
+
+# tcId = 212
+# padding too long
+ct = 791f088847a76731e0d56b9b2dcb28bf9f091a9725790e0a64fc8e7cb3ad50f380297a98e3b1c33e
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 0000000000000000000000000000000000000000000000000000000000000000
+result = invalid
+
+# tcId = 213
+# incorrectly encoded length
+ct = 868c34495bd3d7b4e2c1861e7fcbbdb372099488dd96c9ea
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 214
+# length = 2**32-1
+ct = 4a8b4aeaa713469bfd9bf88d4072379fc858e40b24b0bebe
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 215
+# length = 2**32-1
+ct = c210aa3b5fbf5eac97e68d98d7727f38
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 0000000000000000
+result = invalid
+
+# tcId = 216
+# length = 2**31-1
+ct = e0ebd376e050cc9027b76dfc38ee2c6ae2808cecf480a560
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 217
+# length = 2**31 + 16
+ct = 23a693e211c08ab9b222c2ede2db18f437e22917fdff8032
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 218
+# data is incorrectly padded
+ct = 003f2916fea6827e01199028d3dc4e03889113f97b1860cc242e5a0f28a0f159
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = ffffffffffffffffffffffffffffffffffffffffffffffff
+result = invalid
+
+# tcId = 219
+# data is incorrectly padded
+ct = 5c25a170d5225a6d66e117c691b37383
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 0001020304050607
+result = invalid
+
+# tcId = 220
+# length = 0
+ct = df9ef924eb59634be5b27cabd33d72bd6be6e01e4672ab05
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 00000000000000000000000000000000
+result = invalid
+
+# tcId = 221
+# RFC 3349 padding with incorrect size
+ct = e6e66fad359a7b63a977788acd297121
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 0001020304050607
+result = invalid
+
+# tcId = 222
+# length = 9
+ct = 76b88ecda760b1af80703036185fc476
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 0000000000000000
+result = invalid
+
+# tcId = 223
+# length = 16
+ct = fd101943f4ab7c38ec68c75d4b3193dc
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 0000000000000000
+result = invalid
+
+# tcId = 224
+# length = 2**31 + 8
+ct = 1793a3a9bd146726edbcb9589f20e849
+key = 48a53c11ef2d727db7eb9a834b134ea9602273aca929702eb2c31d96a58c9be2
+msg = 0000000000000000
+result = invalid
+