diff options
Diffstat (limited to 'src/crypto/fipsmodule/ec/p224-64.c')
-rw-r--r-- | src/crypto/fipsmodule/ec/p224-64.c | 318 |
1 files changed, 190 insertions, 128 deletions
diff --git a/src/crypto/fipsmodule/ec/p224-64.c b/src/crypto/fipsmodule/ec/p224-64.c index 27496861..f8af39be 100644 --- a/src/crypto/fipsmodule/ec/p224-64.c +++ b/src/crypto/fipsmodule/ec/p224-64.c @@ -758,7 +758,9 @@ static void p224_point_add(p224_felem x3, p224_felem y3, p224_felem z3, z1_is_zero = p224_felem_is_zero(z1); z2_is_zero = p224_felem_is_zero(z2); // In affine coordinates, (X_1, Y_1) == (X_2, Y_2) - if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { + p224_limb is_nontrivial_double = + x_equal & y_equal & (1 - z1_is_zero) & (1 - z2_is_zero); + if (is_nontrivial_double) { p224_point_double(x3, y3, z3, x1, y1, z1); return; } @@ -871,95 +873,6 @@ static char p224_get_bit(const p224_felem_bytearray in, size_t i) { return (in[i >> 3] >> (i & 7)) & 1; } -// Interleaved point multiplication using precomputed point multiples: -// The small point multiples 0*P, 1*P, ..., 16*P are in p_pre_comp, the scalars -// in p_scalar, if non-NULL. If g_scalar is non-NULL, we also add this multiple -// of the generator, using certain (large) precomputed multiples in -// g_p224_pre_comp. Output point (X, Y, Z) is stored in x_out, y_out, z_out -static void p224_batch_mul(p224_felem x_out, p224_felem y_out, p224_felem z_out, - const uint8_t *p_scalar, const uint8_t *g_scalar, - const p224_felem p_pre_comp[17][3]) { - p224_felem nq[3], tmp[4]; - uint64_t bits; - uint8_t sign, digit; - - // set nq to the point at infinity - OPENSSL_memset(nq, 0, 3 * sizeof(p224_felem)); - - // Loop over both scalars msb-to-lsb, interleaving additions of multiples of - // the generator (two in each of the last 28 rounds) and additions of p (every - // 5th round). - int skip = 1; // save two point operations in the first round - size_t i = p_scalar != NULL ? 220 : 27; - for (;;) { - // double - if (!skip) { - p224_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); - } - - // add multiples of the generator - if (g_scalar != NULL && i <= 27) { - // first, look 28 bits upwards - bits = p224_get_bit(g_scalar, i + 196) << 3; - bits |= p224_get_bit(g_scalar, i + 140) << 2; - bits |= p224_get_bit(g_scalar, i + 84) << 1; - bits |= p224_get_bit(g_scalar, i + 28); - // select the point to add, in constant time - p224_select_point(bits, 16, g_p224_pre_comp[1], tmp); - - if (!skip) { - p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, - tmp[0], tmp[1], tmp[2]); - } else { - OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem)); - skip = 0; - } - - // second, look at the current position - bits = p224_get_bit(g_scalar, i + 168) << 3; - bits |= p224_get_bit(g_scalar, i + 112) << 2; - bits |= p224_get_bit(g_scalar, i + 56) << 1; - bits |= p224_get_bit(g_scalar, i); - // select the point to add, in constant time - p224_select_point(bits, 16, g_p224_pre_comp[0], tmp); - p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, - tmp[0], tmp[1], tmp[2]); - } - - // do other additions every 5 doublings - if (p_scalar != NULL && i % 5 == 0) { - bits = p224_get_bit(p_scalar, i + 4) << 5; - bits |= p224_get_bit(p_scalar, i + 3) << 4; - bits |= p224_get_bit(p_scalar, i + 2) << 3; - bits |= p224_get_bit(p_scalar, i + 1) << 2; - bits |= p224_get_bit(p_scalar, i) << 1; - bits |= p224_get_bit(p_scalar, i - 1); - ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); - - // select the point to add or subtract - p224_select_point(digit, 17, p_pre_comp, tmp); - p224_felem_neg(tmp[3], tmp[1]); // (X, -Y, Z) is the negative point - p224_copy_conditional(tmp[1], tmp[3], sign); - - if (!skip) { - p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */, - tmp[0], tmp[1], tmp[2]); - } else { - OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem)); - skip = 0; - } - } - - if (i == 0) { - break; - } - --i; - } - p224_felem_assign(x_out, nq[0]); - p224_felem_assign(y_out, nq[1]); - p224_felem_assign(z_out, nq[2]); -} - // Takes the Jacobian coordinates (X, Y, Z) of a point and returns // (X', Y') = (X/Z^2, Y/Z^3) static int ec_GFp_nistp224_point_get_affine_coordinates( @@ -1027,49 +940,197 @@ static void ec_GFp_nistp224_dbl(const EC_GROUP *group, EC_RAW_POINT *r, p224_felem_to_generic(&r->Z, z); } -static void ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, - const EC_RAW_POINT *p, - const EC_SCALAR *p_scalar) { +static void ec_GFp_nistp224_make_precomp(p224_felem out[17][3], + const EC_RAW_POINT *p) { + OPENSSL_memset(out[0], 0, sizeof(p224_felem) * 3); + + p224_generic_to_felem(out[1][0], &p->X); + p224_generic_to_felem(out[1][1], &p->Y); + p224_generic_to_felem(out[1][2], &p->Z); + + for (size_t j = 2; j <= 16; ++j) { + if (j & 1) { + p224_point_add(out[j][0], out[j][1], out[j][2], out[1][0], out[1][1], + out[1][2], 0, out[j - 1][0], out[j - 1][1], out[j - 1][2]); + } else { + p224_point_double(out[j][0], out[j][1], out[j][2], out[j / 2][0], + out[j / 2][1], out[j / 2][2]); + } + } +} + +static void ec_GFp_nistp224_point_mul(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_RAW_POINT *p, + const EC_SCALAR *scalar) { p224_felem p_pre_comp[17][3]; - p224_felem x_out, y_out, z_out; - - if (p != NULL && p_scalar != NULL) { - // We treat NULL scalars as 0, and NULL points as points at infinity, i.e., - // they contribute nothing to the linear combination. - OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); - // precompute multiples - p224_generic_to_felem(x_out, &p->X); - p224_generic_to_felem(y_out, &p->Y); - p224_generic_to_felem(z_out, &p->Z); - - p224_felem_assign(p_pre_comp[1][0], x_out); - p224_felem_assign(p_pre_comp[1][1], y_out); - p224_felem_assign(p_pre_comp[1][2], z_out); - - for (size_t j = 2; j <= 16; ++j) { - if (j & 1) { - p224_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], - p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0, - p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], - p_pre_comp[j - 1][2]); + ec_GFp_nistp224_make_precomp(p_pre_comp, p); + + // Set nq to the point at infinity. + p224_felem nq[3], tmp[4]; + OPENSSL_memset(nq, 0, 3 * sizeof(p224_felem)); + + int skip = 1; // Save two point operations in the first round. + for (size_t i = 220; i < 221; i--) { + if (!skip) { + p224_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); + } + + // Add every 5 doublings. + if (i % 5 == 0) { + uint64_t bits = p224_get_bit(scalar->bytes, i + 4) << 5; + bits |= p224_get_bit(scalar->bytes, i + 3) << 4; + bits |= p224_get_bit(scalar->bytes, i + 2) << 3; + bits |= p224_get_bit(scalar->bytes, i + 1) << 2; + bits |= p224_get_bit(scalar->bytes, i) << 1; + bits |= p224_get_bit(scalar->bytes, i - 1); + uint8_t sign, digit; + ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); + + // Select the point to add or subtract. + p224_select_point(digit, 17, (const p224_felem(*)[3])p_pre_comp, tmp); + p224_felem_neg(tmp[3], tmp[1]); // (X, -Y, Z) is the negative point + p224_copy_conditional(tmp[1], tmp[3], sign); + + if (!skip) { + p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */, + tmp[0], tmp[1], tmp[2]); } else { - p224_point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], - p_pre_comp[j / 2][0], p_pre_comp[j / 2][1], - p_pre_comp[j / 2][2]); + OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem)); + skip = 0; } } } - p224_batch_mul(x_out, y_out, z_out, - (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL, - g_scalar != NULL ? g_scalar->bytes : NULL, - (const p224_felem(*)[3])p_pre_comp); + // Reduce the output to its unique minimal representation. + p224_felem_to_generic(&r->X, nq[0]); + p224_felem_to_generic(&r->Y, nq[1]); + p224_felem_to_generic(&r->Z, nq[2]); +} + +static void ec_GFp_nistp224_point_mul_base(const EC_GROUP *group, + EC_RAW_POINT *r, + const EC_SCALAR *scalar) { + // Set nq to the point at infinity. + p224_felem nq[3], tmp[3]; + OPENSSL_memset(nq, 0, 3 * sizeof(p224_felem)); + + int skip = 1; // Save two point operations in the first round. + for (size_t i = 27; i < 28; i--) { + // double + if (!skip) { + p224_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); + } + + // First, look 28 bits upwards. + uint64_t bits = p224_get_bit(scalar->bytes, i + 196) << 3; + bits |= p224_get_bit(scalar->bytes, i + 140) << 2; + bits |= p224_get_bit(scalar->bytes, i + 84) << 1; + bits |= p224_get_bit(scalar->bytes, i + 28); + // Select the point to add, in constant time. + p224_select_point(bits, 16, g_p224_pre_comp[1], tmp); + + if (!skip) { + p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, + tmp[0], tmp[1], tmp[2]); + } else { + OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem)); + skip = 0; + } + + // Second, look at the current position/ + bits = p224_get_bit(scalar->bytes, i + 168) << 3; + bits |= p224_get_bit(scalar->bytes, i + 112) << 2; + bits |= p224_get_bit(scalar->bytes, i + 56) << 1; + bits |= p224_get_bit(scalar->bytes, i); + // Select the point to add, in constant time. + p224_select_point(bits, 16, g_p224_pre_comp[0], tmp); + p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, + tmp[0], tmp[1], tmp[2]); + } + + // Reduce the output to its unique minimal representation. + p224_felem_to_generic(&r->X, nq[0]); + p224_felem_to_generic(&r->Y, nq[1]); + p224_felem_to_generic(&r->Z, nq[2]); +} + +static void ec_GFp_nistp224_point_mul_public(const EC_GROUP *group, + EC_RAW_POINT *r, + const EC_SCALAR *g_scalar, + const EC_RAW_POINT *p, + const EC_SCALAR *p_scalar) { + // TODO(davidben): If P-224 ECDSA verify performance ever matters, using + // |ec_compute_wNAF| for |p_scalar| would likely be an easy improvement. + p224_felem p_pre_comp[17][3]; + ec_GFp_nistp224_make_precomp(p_pre_comp, p); + + // Set nq to the point at infinity. + p224_felem nq[3], tmp[3]; + OPENSSL_memset(nq, 0, 3 * sizeof(p224_felem)); + + // Loop over both scalars msb-to-lsb, interleaving additions of multiples of + // the generator (two in each of the last 28 rounds) and additions of p (every + // 5th round). + int skip = 1; // Save two point operations in the first round. + for (size_t i = 220; i < 221; i--) { + if (!skip) { + p224_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); + } + + // Add multiples of the generator. + if (i <= 27) { + // First, look 28 bits upwards. + uint64_t bits = p224_get_bit(g_scalar->bytes, i + 196) << 3; + bits |= p224_get_bit(g_scalar->bytes, i + 140) << 2; + bits |= p224_get_bit(g_scalar->bytes, i + 84) << 1; + bits |= p224_get_bit(g_scalar->bytes, i + 28); + + p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, + g_p224_pre_comp[1][bits][0], g_p224_pre_comp[1][bits][1], + g_p224_pre_comp[1][bits][2]); + assert(!skip); + + // Second, look at the current position. + bits = p224_get_bit(g_scalar->bytes, i + 168) << 3; + bits |= p224_get_bit(g_scalar->bytes, i + 112) << 2; + bits |= p224_get_bit(g_scalar->bytes, i + 56) << 1; + bits |= p224_get_bit(g_scalar->bytes, i); + p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, + g_p224_pre_comp[0][bits][0], g_p224_pre_comp[0][bits][1], + g_p224_pre_comp[0][bits][2]); + } + + // Incorporate |p_scalar| every 5 doublings. + if (i % 5 == 0) { + uint64_t bits = p224_get_bit(p_scalar->bytes, i + 4) << 5; + bits |= p224_get_bit(p_scalar->bytes, i + 3) << 4; + bits |= p224_get_bit(p_scalar->bytes, i + 2) << 3; + bits |= p224_get_bit(p_scalar->bytes, i + 1) << 2; + bits |= p224_get_bit(p_scalar->bytes, i) << 1; + bits |= p224_get_bit(p_scalar->bytes, i - 1); + uint8_t sign, digit; + ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); + + // Select the point to add or subtract. + OPENSSL_memcpy(tmp, p_pre_comp[digit], 3 * sizeof(p224_felem)); + if (sign) { + p224_felem_neg(tmp[1], tmp[1]); // (X, -Y, Z) is the negative point + } + + if (!skip) { + p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */, + tmp[0], tmp[1], tmp[2]); + } else { + OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem)); + skip = 0; + } + } + } - // reduce the output to its unique minimal representation - p224_felem_to_generic(&r->X, x_out); - p224_felem_to_generic(&r->Y, y_out); - p224_felem_to_generic(&r->Z, z_out); + // Reduce the output to its unique minimal representation. + p224_felem_to_generic(&r->X, nq[0]); + p224_felem_to_generic(&r->Y, nq[1]); + p224_felem_to_generic(&r->Z, nq[2]); } static void ec_GFp_nistp224_felem_mul(const EC_GROUP *group, EC_FELEM *r, @@ -1111,8 +1172,9 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp224_method) { ec_GFp_nistp224_point_get_affine_coordinates; out->add = ec_GFp_nistp224_add; out->dbl = ec_GFp_nistp224_dbl; - out->mul = ec_GFp_nistp224_points_mul; - out->mul_public = ec_GFp_nistp224_points_mul; + out->mul = ec_GFp_nistp224_point_mul; + out->mul_base = ec_GFp_nistp224_point_mul_base; + out->mul_public = ec_GFp_nistp224_point_mul_public; out->felem_mul = ec_GFp_nistp224_felem_mul; out->felem_sqr = ec_GFp_nistp224_felem_sqr; out->bignum_to_felem = ec_GFp_nistp224_bignum_to_felem; |