external/boringssl: Sync to fa3aadcd40ec4fd27a6e9492ef099b3dcc6eb2af.android-o-mr1-iot-release-smart-display-r9 android-o-mr1-iot-release-smart-display-r8 android-o-mr1-iot-release-smart-display-r5 android-o-mr1-iot-release-smart-display-r40.1J android-o-mr1-iot-release-smart-display-r4 android-o-mr1-iot-release-smart-display-r39 android-o-mr1-iot-release-smart-display-r30 android-o-mr1-iot-release-smart-display-r22 android-o-mr1-iot-release-smart-display-r14 android-o-mr1-iot-release-smart-clock-r6 android-o-mr1-iot-release-smart-clock-r2 android-o-mr1-iot-release-smart-clock-fsi android-o-mr1-iot-release-smart-clock-fcs android-o-mr1-iot-release-cube_r2 android-o-mr1-iot-release-cube-fsi android-o-mr1-iot-release-cube-fcs

This includes the following changes: https://boringssl.googlesource.com/boringssl/+log/7f7e5e231efec6e86d6c7d3fd1b759be1cece156..fa3aadcd40ec4fd27a6e9492ef099b3dcc6eb2af Test: BoringSSL CTS Presubmits. Change-Id: I5381241ee7b94e1076d04090a0bc468b7816a1a1
author: Robert Sloan <varomodt@google.com> 2018-11-12 13:38:50 -0800
committer: Robert Sloan <varomodt@google.com> 2018-11-12 13:51:26 -0800
commit: a51059f202525842fc0d628a408ad5a5e33a54e7 (patch)
tree: 5b28ee9547e63d78299aaedf791cc293a8518f07 /src
parent: cbf5ea62f9677735fb503a0a23ab3ee8c15ef40e (diff)
download: boringssl-a51059f202525842fc0d628a408ad5a5e33a54e7.tar.gz
27 files changed, 1758 insertions, 480 deletions
diff --git a/src/crypto/fipsmodule/CMakeLists.txt b/src/crypto/fipsmodule/CMakeLists.txt
index e6c8cc69..463febbf 100644
--- a/src/crypto/fipsmodule/CMakeLists.txt
+++ b/src/crypto/fipsmodule/CMakeLists.txt
@@ -11,6 +11,7 @@ if(${ARCH} STREQUAL "x86_64")
     ghash-x86_64.${ASM_EXT}
     md5-x86_64.${ASM_EXT}
     p256-x86_64-asm.${ASM_EXT}
+    p256_beeu-x86_64-asm.${ASM_EXT}
     rdrand-x86_64.${ASM_EXT}
     rsaz-avx2.${ASM_EXT}
     sha1-x86_64.${ASM_EXT}
@@ -100,6 +101,7 @@ perlasm(ghash-x86.${ASM_EXT} modes/asm/ghash-x86.pl)
 perlasm(md5-586.${ASM_EXT} md5/asm/md5-586.pl)
 perlasm(md5-x86_64.${ASM_EXT} md5/asm/md5-x86_64.pl)
 perlasm(p256-x86_64-asm.${ASM_EXT} ec/asm/p256-x86_64-asm.pl)
+perlasm(p256_beeu-x86_64-asm.${ASM_EXT} ec/asm/p256_beeu-x86_64-asm.pl)
 perlasm(rdrand-x86_64.${ASM_EXT} rand/asm/rdrand-x86_64.pl)
 perlasm(rsaz-avx2.${ASM_EXT} bn/asm/rsaz-avx2.pl)
 perlasm(sha1-586.${ASM_EXT} sha/asm/sha1-586.pl)
diff --git a/src/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl b/src/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
new file mode 100644
index 00000000..12b9f5af
--- /dev/null
+++ b/src/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
@@ -0,0 +1,405 @@
+# Copyright (c) 2018, Amazon Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+#
+# Written by Nir Drucker, and Shay Gueron
+# AWS Cryptographic Algorithms Group
+# (ndrucker@amazon.com, gueron@amazon.com)
+# based on BN_mod_inverse_odd
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT=*OUT;
+
+#############################################################################
+# extern int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],
+#                                     BN_ULONG a[P256_LIMBS],
+#                                     BN_ULONG n[P256_LIMBS]);
+#
+# (Binary Extended Euclidean Algorithm.
+#  See https://en.wikipedia.org/wiki/Binary_GCD_algorithm)
+#
+# Assumption 1: n is odd for the BEEU
+# Assumption 2: 1 < a < n < 2^256
+
+$out = "%rdi";
+$a = "%rsi";
+$n = "%rdx";
+
+# X/Y will hold the inverse parameter
+# Assumption: X,Y<2^(256)
+$x0 = "%r8";
+$x1 = "%r9";
+$x2 = "%r10";
+$x3 = "%r11";
+# borrow from out (out is needed only at the end)
+$x4 = "%rdi";
+$y0 = "%r12";
+$y1 = "%r13";
+$y2 = "%r14";
+$y3 = "%r15";
+$y4 = "%rbp";
+$shift = "%rcx";
+$t0 = "%rax";
+$t1 = "%rbx";
+$t2 = "%rsi";
+# borrow
+$t3 = "%rcx";
+
+$T0 = "%xmm0";
+$T1 = "%xmm1";
+
+# Offsets on the stack
+$out_rsp = 0;
+$shift_rsp = $out_rsp+0x8;
+$a_rsp0 = $shift_rsp+0x8;
+$a_rsp1 = $a_rsp0+0x8;
+$a_rsp2 = $a_rsp1+0x8;
+$a_rsp3 = $a_rsp2+0x8;
+$b_rsp0 = $a_rsp3+0x8;
+$b_rsp1 = $b_rsp0+0x8;
+$b_rsp2 = $b_rsp1+0x8;
+$b_rsp3 = $b_rsp2+0x8;
+
+# Borrow when a_rsp/b_rsp are no longer needed.
+$y_rsp0 = $a_rsp0;
+$y_rsp1 = $y_rsp0+0x8;
+$y_rsp2 = $y_rsp1+0x8;
+$y_rsp3 = $y_rsp2+0x8;
+$y_rsp4 = $y_rsp3+0x8;
+$last_rsp_offset = $b_rsp3+0x8;
+
+sub TEST_B_ZERO {
+  return <<___;
+    xorq $t1, $t1
+    or $b_rsp0(%rsp), $t1
+    or $b_rsp1(%rsp), $t1
+    or $b_rsp2(%rsp), $t1
+    or $b_rsp3(%rsp), $t1
+    jz .Lbeeu_loop_end
+___
+}
+
+$g_next_label = 0;
+
+sub SHIFT1 {
+  my ($var0, $var1, $var2, $var3, $var4) = @_;
+  my $label = ".Lshift1_${g_next_label}";
+  $g_next_label++;
+
+  return <<___;
+    # Ensure X is even and divide by two.
+    movq \$1, $t1
+    andq $var0, $t1
+    jz $label
+    add 0*8($n), $var0
+    adc 1*8($n), $var1
+    adc 2*8($n), $var2
+    adc 3*8($n), $var3
+    adc \$0, $var4
+
+$label:
+    shrdq \$1, $var1, $var0
+    shrdq \$1, $var2, $var1
+    shrdq \$1, $var3, $var2
+    shrdq \$1, $var4, $var3
+    shrq  \$1, $var4
+___
+}
+
+sub SHIFT256 {
+  my ($var) = @_;
+  return <<___;
+    # Copy shifted values.
+    # Remember not to override t3=rcx
+    movq 1*8+$var(%rsp), $t0
+    movq 2*8+$var(%rsp), $t1
+    movq 3*8+$var(%rsp), $t2
+
+    shrdq %cl, $t0, 0*8+$var(%rsp)
+    shrdq %cl, $t1, 1*8+$var(%rsp)
+    shrdq %cl, $t2, 2*8+$var(%rsp)
+
+    shrq  %cl, $t2
+    mov $t2, 3*8+$var(%rsp)
+___
+}
+
+$code.=<<___;
+.text
+
+.type beeu_mod_inverse_vartime,\@function
+.hidden beeu_mod_inverse_vartime
+.globl  beeu_mod_inverse_vartime
+.align 32
+beeu_mod_inverse_vartime:
+.cfi_startproc
+    push %rbp
+.cfi_push rbp
+    movq %rsp, %rbp
+.cfi_def_cfa_register rbp
+
+    push %r12
+.cfi_push r12
+    push %r13
+.cfi_push r13
+    push %r14
+.cfi_push r14
+    push %r15
+.cfi_push r15
+    push %rbx
+.cfi_push rbx
+    push %rsi
+.cfi_push rsi
+
+    sub \$$last_rsp_offset, %rsp
+    movq $out, $out_rsp(%rsp)
+
+    # X=1, Y=0
+    movq \$1, $x0
+    xorq $x1, $x1
+    xorq $x2, $x2
+    xorq $x3, $x3
+    xorq $x4, $x4
+
+    xorq $y0, $y0
+    xorq $y1, $y1
+    xorq $y2, $y2
+    xorq $y3, $y3
+    xorq $y4, $y4
+
+    # Copy a/n into B/A on the stack.
+    vmovdqu 0*8($a), $T0
+    vmovdqu 2*8($a), $T1
+    vmovdqu $T0, $b_rsp0(%rsp)
+    vmovdqu $T1, $b_rsp2(%rsp)
+
+    vmovdqu 0*8($n), $T0
+    vmovdqu 2*8($n), $T1
+    vmovdqu $T0, $a_rsp0(%rsp)
+    vmovdqu $T1, $a_rsp2(%rsp)
+
+.Lbeeu_loop:
+    ${\TEST_B_ZERO}
+
+    # 0 < B < |n|,
+    # 0 < A <= |n|,
+    # (1)      X*a  ==  B   (mod |n|),
+    # (2) (-1)*Y*a  ==  A   (mod |n|)
+
+    # Now divide B by the maximum possible power of two in the
+    # integers, and divide X by the same value mod |n|. When we're
+    # done, (1) still holds.
+    movq \$1, $shift
+
+    # Note that B > 0
+.Lbeeu_shift_loop_XB:
+    movq $shift, $t1
+    andq $b_rsp0(%rsp), $t1
+    jnz .Lbeeu_shift_loop_end_XB
+
+    ${\SHIFT1($x0, $x1, $x2, $x3, $x4)}
+    shl \$1, $shift
+
+    # Test wraparound of the shift parameter. The probability to have 32 zeroes
+    # in a row is small Therefore having the value below equal \$0x8000000 or
+    # \$0x8000 does not affect the performance. We choose 0x8000000 because it
+    # is the maximal immediate value possible.
+    cmp \$0x8000000, $shift
+    jne .Lbeeu_shift_loop_XB
+
+.Lbeeu_shift_loop_end_XB:
+    bsf $shift, $shift
+    test $shift, $shift
+    jz .Lbeeu_no_shift_XB
+
+    ${\SHIFT256($b_rsp0)}
+
+.Lbeeu_no_shift_XB:
+    # Same for A and Y.  Afterwards, (2) still holds.
+    movq \$1, $shift
+
+    # Note that A > 0
+.Lbeeu_shift_loop_YA:
+    movq $shift, $t1
+    andq $a_rsp0(%rsp), $t1
+    jnz .Lbeeu_shift_loop_end_YA
+
+    ${\SHIFT1($y0, $y1, $y2, $y3, $y4)}
+    shl \$1, $shift
+
+    # Test wraparound of the shift parameter. The probability to have 32 zeroes
+    # in a row is small therefore having the value below equal \$0x8000000 or
+    # \$0x8000 Does not affect the performance. We choose 0x8000000 because it
+    # is the maximal immediate value possible.
+    cmp \$0x8000000, $shift
+    jne .Lbeeu_shift_loop_YA
+
+.Lbeeu_shift_loop_end_YA:
+    bsf $shift, $shift
+    test $shift, $shift
+    jz .Lbeeu_no_shift_YA
+
+    ${\SHIFT256($a_rsp0)}
+
+.Lbeeu_no_shift_YA:
+    # T = B-A (A,B < 2^256)
+    mov $b_rsp0(%rsp), $t0
+    mov $b_rsp1(%rsp), $t1
+    mov $b_rsp2(%rsp), $t2
+    mov $b_rsp3(%rsp), $t3
+    sub $a_rsp0(%rsp), $t0
+    sbb $a_rsp1(%rsp), $t1
+    sbb $a_rsp2(%rsp), $t2
+    sbb $a_rsp3(%rsp), $t3  # borrow from shift
+    jnc .Lbeeu_B_bigger_than_A
+
+    # A = A - B
+    mov $a_rsp0(%rsp), $t0
+    mov $a_rsp1(%rsp), $t1
+    mov $a_rsp2(%rsp), $t2
+    mov $a_rsp3(%rsp), $t3
+    sub $b_rsp0(%rsp), $t0
+    sbb $b_rsp1(%rsp), $t1
+    sbb $b_rsp2(%rsp), $t2
+    sbb $b_rsp3(%rsp), $t3
+    mov $t0, $a_rsp0(%rsp)
+    mov $t1, $a_rsp1(%rsp)
+    mov $t2, $a_rsp2(%rsp)
+    mov $t3, $a_rsp3(%rsp)
+
+    # Y = Y + X
+    add $x0, $y0
+    adc $x1, $y1
+    adc $x2, $y2
+    adc $x3, $y3
+    adc $x4, $y4
+    jmp .Lbeeu_loop
+
+.Lbeeu_B_bigger_than_A:
+    # B = T = B - A
+    mov $t0, $b_rsp0(%rsp)
+    mov $t1, $b_rsp1(%rsp)
+    mov $t2, $b_rsp2(%rsp)
+    mov $t3, $b_rsp3(%rsp)
+
+    # X = Y + X
+    add $y0, $x0
+    adc $y1, $x1
+    adc $y2, $x2
+    adc $y3, $x3
+    adc $y4, $x4
+
+    jmp .Lbeeu_loop
+
+.Lbeeu_loop_end:
+    # The Euclid's algorithm loop ends when A == beeu(a,n);
+    # Therefore (-1)*Y*a == A (mod |n|), Y>0
+
+    # Verify that A = 1 ==> (-1)*Y*a = A = 1  (mod |n|)
+    mov $a_rsp0(%rsp), $t1
+    sub \$1, $t1
+    or $a_rsp1(%rsp), $t1
+    or $a_rsp2(%rsp), $t1
+    or $a_rsp3(%rsp), $t1
+    # If not, fail.
+    jnz .Lbeeu_err
+
+    # From this point on, we no longer need X
+    # Therefore we use it as a temporary storage.
+    # X = n
+    movq 0*8($n), $x0
+    movq 1*8($n), $x1
+    movq 2*8($n), $x2
+    movq 3*8($n), $x3
+    xorq $x4, $x4
+
+.Lbeeu_reduction_loop:
+    movq $y0, $y_rsp0(%rsp)
+    movq $y1, $y_rsp1(%rsp)
+    movq $y2, $y_rsp2(%rsp)
+    movq $y3, $y_rsp3(%rsp)
+    movq $y4, $y_rsp4(%rsp)
+
+    # If Y>n ==> Y=Y-n
+    sub $x0, $y0
+    sbb $x1, $y1
+    sbb $x2, $y2
+    sbb $x3, $y3
+    sbb \$0, $y4
+
+    # Choose old Y or new Y
+    cmovc $y_rsp0(%rsp), $y0
+    cmovc $y_rsp1(%rsp), $y1
+    cmovc $y_rsp2(%rsp), $y2
+    cmovc $y_rsp3(%rsp), $y3
+    jnc .Lbeeu_reduction_loop
+
+    # X = n - Y (n, Y < 2^256), (Cancel the (-1))
+    sub $y0, $x0
+    sbb $y1, $x1
+    sbb $y2, $x2
+    sbb $y3, $x3
+
+.Lbeeu_save:
+    # Save the inverse(<2^256) to out.
+    mov $out_rsp(%rsp), $out
+
+    movq $x0, 0*8($out)
+    movq $x1, 1*8($out)
+    movq $x2, 2*8($out)
+    movq $x3, 3*8($out)
+
+    # Return 1.
+    movq \$1, %rax
+    jmp .Lbeeu_finish
+
+.Lbeeu_err:
+    # Return 0.
+    xorq %rax, %rax
+
+.Lbeeu_finish:
+    add \$$last_rsp_offset, %rsp
+    pop %rsi
+.cfi_pop rsi
+    pop %rbx
+.cfi_pop rbx
+    pop %r15
+.cfi_pop r15
+    pop %r14
+.cfi_pop r14
+    pop %r13
+.cfi_pop r13
+    pop %r12
+.cfi_pop r12
+    pop %rbp
+.cfi_pop rbp
+.cfi_def_cfa rsp, 8
+.cfi_endproc
+    ret
+
+.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
+___
+
+print $code;
+close STDOUT;
diff --git a/src/crypto/fipsmodule/ec/ec.c b/src/crypto/fipsmodule/ec/ec.c
index 908e35e9..ba101fea 100644
--- a/src/crypto/fipsmodule/ec/ec.c
+++ b/src/crypto/fipsmodule/ec/ec.c
@@ -619,7 +619,7 @@ int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *out_p, BIGNUM *out_a,
 int EC_GROUP_get_curve_name(const EC_GROUP *group) { return group->curve_name; }
 
 unsigned EC_GROUP_get_degree(const EC_GROUP *group) {
-  return ec_GFp_simple_group_get_degree(group);
+  return BN_num_bits(&group->field);
 }
 
 const char *EC_curve_nid2nist(int nid) {
@@ -743,7 +743,15 @@ int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group,
     OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
     return 0;
   }
-  return group->meth->point_get_affine_coordinates(group, &point->raw, x, y);
+  EC_FELEM x_felem, y_felem;
+  if (!group->meth->point_get_affine_coordinates(group, &point->raw,
+                                                 x == NULL ? NULL : &x_felem,
+                                                 y == NULL ? NULL : &y_felem) ||
+      (x != NULL && !bn_set_words(x, x_felem.words, group->field.width)) ||
+      (y != NULL && !bn_set_words(y, y_felem.words, group->field.width))) {
+    return 0;
+  }
+  return 1;
 }
 
 int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *point,
@@ -782,7 +790,7 @@ int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
     OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
     return 0;
   }
-  ec_GFp_simple_add(group, &r->raw, &a->raw, &b->raw);
+  group->meth->add(group, &r->raw, &a->raw, &b->raw);
   return 1;
 }
 
@@ -793,7 +801,7 @@ int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
     OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
     return 0;
   }
-  ec_GFp_simple_dbl(group, &r->raw, &a->raw);
+  group->meth->dbl(group, &r->raw, &a->raw);
   return 1;
 }
 
@@ -911,6 +919,43 @@ int ec_point_mul_scalar(const EC_GROUP *group, EC_POINT *r,
   return 1;
 }
 
+int ec_cmp_x_coordinate(int *out_result, const EC_GROUP *group,
+                        const EC_POINT *p, const BIGNUM *r, BN_CTX *ctx) {
+  return group->meth->cmp_x_coordinate(out_result, group, p, r, ctx);
+}
+
+int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r) {
+  // We must have p < 2×order, assuming p is not tiny (p >= 17). Thus rather we
+  // can reduce by performing at most one subtraction.
+  //
+  // Proof: We only work with prime order curves, so the number of points on
+  // the curve is the order. Thus Hasse's theorem gives:
+  //
+  //     |order - (p + 1)| <= 2×sqrt(p)
+  //         p + 1 - order <= 2×sqrt(p)
+  //     p + 1 - 2×sqrt(p) <= order
+  //       p + 1 - 2×(p/4)  < order       (p/4 > sqrt(p) for p >= 17)
+  //         p/2 < p/2 + 1  < order
+  //                     p  < 2×order
+  //
+  // Additionally, one can manually check this property for built-in curves. It
+  // is enforced for legacy custom curves in |EC_GROUP_set_generator|.
+  //
+  // TODO(davidben): Introduce |EC_FIELD_ELEMENT|, make this a function from
+  // |EC_FIELD_ELEMENT| to |EC_SCALAR|, and cut out the |BIGNUM|. Does this need
+  // to be constant-time for signing? |r| is the x-coordinate for kG, which is
+  // public unless k was rerolled because |s| was zero.
+  assert(!BN_is_negative(r));
+  assert(BN_cmp(r, &group->field) < 0);
+  if (BN_cmp(r, &group->order) >= 0 &&
+      !BN_sub(r, r, &group->order)) {
+    return 0;
+  }
+  assert(!BN_is_negative(r));
+  assert(BN_cmp(r, &group->order) < 0);
+  return 1;
+}
+
 void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag) {}
 
 const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group) {
diff --git a/src/crypto/fipsmodule/ec/ec_montgomery.c b/src/crypto/fipsmodule/ec/ec_montgomery.c
index 9eace95f..4961a7c8 100644
--- a/src/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/src/crypto/fipsmodule/ec/ec_montgomery.c
@@ -182,7 +182,7 @@ int ec_GFp_mont_felem_to_bignum(const EC_GROUP *group, BIGNUM *out,
 
 static int ec_GFp_mont_point_get_affine_coordinates(const EC_GROUP *group,
                                                     const EC_RAW_POINT *point,
-                                                    BIGNUM *x, BIGNUM *y) {
+                                                    EC_FELEM *x, EC_FELEM *y) {
   if (ec_GFp_simple_is_at_infinity(group, point)) {
     OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
     return 0;
@@ -201,35 +201,236 @@ static int ec_GFp_mont_point_get_affine_coordinates(const EC_GROUP *group,
   ec_GFp_mont_felem_from_montgomery(group, &z1, &z1);
 
   if (x != NULL) {
-    EC_FELEM tmp;
-    ec_GFp_mont_felem_mul(group, &tmp, &point->X, &z1);
-    if (!bn_set_words(x, tmp.words, group->field.width)) {
-      return 0;
-    }
+    ec_GFp_mont_felem_mul(group, x, &point->X, &z1);
   }
 
   if (y != NULL) {
-    EC_FELEM tmp;
     ec_GFp_mont_felem_mul(group, &z1, &z1, &z2);
-    ec_GFp_mont_felem_mul(group, &tmp, &point->Y, &z1);
-    if (!bn_set_words(y, tmp.words, group->field.width)) {
-      return 0;
-    }
+    ec_GFp_mont_felem_mul(group, y, &point->Y, &z1);
   }
 
   return 1;
 }
 
+void ec_GFp_mont_add(const EC_GROUP *group, EC_RAW_POINT *out,
+                     const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
+  if (a == b) {
+    ec_GFp_mont_dbl(group, out, a);
+    return;
+  }
+
+  // The method is taken from:
+  //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
+  //
+  // Coq transcription and correctness proof:
+  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
+  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
+  EC_FELEM x_out, y_out, z_out;
+  BN_ULONG z1nz = ec_felem_non_zero_mask(group, &a->Z);
+  BN_ULONG z2nz = ec_felem_non_zero_mask(group, &b->Z);
+
+  // z1z1 = z1z1 = z1**2
+  EC_FELEM z1z1;
+  ec_GFp_mont_felem_sqr(group, &z1z1, &a->Z);
+
+  // z2z2 = z2**2
+  EC_FELEM z2z2;
+  ec_GFp_mont_felem_sqr(group, &z2z2, &b->Z);
+
+  // u1 = x1*z2z2
+  EC_FELEM u1;
+  ec_GFp_mont_felem_mul(group, &u1, &a->X, &z2z2);
+
+  // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
+  EC_FELEM two_z1z2;
+  ec_felem_add(group, &two_z1z2, &a->Z, &b->Z);
+  ec_GFp_mont_felem_sqr(group, &two_z1z2, &two_z1z2);
+  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z1z1);
+  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z2z2);
+
+  // s1 = y1 * z2**3
+  EC_FELEM s1;
+  ec_GFp_mont_felem_mul(group, &s1, &b->Z, &z2z2);
+  ec_GFp_mont_felem_mul(group, &s1, &s1, &a->Y);
+
+  // u2 = x2*z1z1
+  EC_FELEM u2;
+  ec_GFp_mont_felem_mul(group, &u2, &b->X, &z1z1);
+
+  // h = u2 - u1
+  EC_FELEM h;
+  ec_felem_sub(group, &h, &u2, &u1);
+
+  BN_ULONG xneq = ec_felem_non_zero_mask(group, &h);
+
+  // z_out = two_z1z2 * h
+  ec_GFp_mont_felem_mul(group, &z_out, &h, &two_z1z2);
+
+  // z1z1z1 = z1 * z1z1
+  EC_FELEM z1z1z1;
+  ec_GFp_mont_felem_mul(group, &z1z1z1, &a->Z, &z1z1);
+
+  // s2 = y2 * z1**3
+  EC_FELEM s2;
+  ec_GFp_mont_felem_mul(group, &s2, &b->Y, &z1z1z1);
+
+  // r = (s2 - s1)*2
+  EC_FELEM r;
+  ec_felem_sub(group, &r, &s2, &s1);
+  ec_felem_add(group, &r, &r, &r);
+
+  BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);
+
+  // This case will never occur in the constant-time |ec_GFp_mont_mul|.
+  if (!xneq && !yneq && z1nz && z2nz) {
+    ec_GFp_mont_dbl(group, out, a);
+    return;
+  }
+
+  // I = (2h)**2
+  EC_FELEM i;
+  ec_felem_add(group, &i, &h, &h);
+  ec_GFp_mont_felem_sqr(group, &i, &i);
+
+  // J = h * I
+  EC_FELEM j;
+  ec_GFp_mont_felem_mul(group, &j, &h, &i);
+
+  // V = U1 * I
+  EC_FELEM v;
+  ec_GFp_mont_felem_mul(group, &v, &u1, &i);
+
+  // x_out = r**2 - J - 2V
+  ec_GFp_mont_felem_sqr(group, &x_out, &r);
+  ec_felem_sub(group, &x_out, &x_out, &j);
+  ec_felem_sub(group, &x_out, &x_out, &v);
+  ec_felem_sub(group, &x_out, &x_out, &v);
+
+  // y_out = r(V-x_out) - 2 * s1 * J
+  ec_felem_sub(group, &y_out, &v, &x_out);
+  ec_GFp_mont_felem_mul(group, &y_out, &y_out, &r);
+  EC_FELEM s1j;
+  ec_GFp_mont_felem_mul(group, &s1j, &s1, &j);
+  ec_felem_sub(group, &y_out, &y_out, &s1j);
+  ec_felem_sub(group, &y_out, &y_out, &s1j);
+
+  ec_felem_select(group, &x_out, z1nz, &x_out, &b->X);
+  ec_felem_select(group, &out->X, z2nz, &x_out, &a->X);
+  ec_felem_select(group, &y_out, z1nz, &y_out, &b->Y);
+  ec_felem_select(group, &out->Y, z2nz, &y_out, &a->Y);
+  ec_felem_select(group, &z_out, z1nz, &z_out, &b->Z);
+  ec_felem_select(group, &out->Z, z2nz, &z_out, &a->Z);
+}
+
+void ec_GFp_mont_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
+                     const EC_RAW_POINT *a) {
+  if (group->a_is_minus3) {
+    // The method is taken from:
+    //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
+    //
+    // Coq transcription and correctness proof:
+    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
+    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
+    EC_FELEM delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
+    // delta = z^2
+    ec_GFp_mont_felem_sqr(group, &delta, &a->Z);
+    // gamma = y^2
+    ec_GFp_mont_felem_sqr(group, &gamma, &a->Y);
+    // beta = x*gamma
+    ec_GFp_mont_felem_mul(group, &beta, &a->X, &gamma);
+
+    // alpha = 3*(x-delta)*(x+delta)
+    ec_felem_sub(group, &ftmp, &a->X, &delta);
+    ec_felem_add(group, &ftmp2, &a->X, &delta);
+
+    ec_felem_add(group, &tmptmp, &ftmp2, &ftmp2);
+    ec_felem_add(group, &ftmp2, &ftmp2, &tmptmp);
+    ec_GFp_mont_felem_mul(group, &alpha, &ftmp, &ftmp2);
+
+    // x' = alpha^2 - 8*beta
+    ec_GFp_mont_felem_sqr(group, &r->X, &alpha);
+    ec_felem_add(group, &fourbeta, &beta, &beta);
+    ec_felem_add(group, &fourbeta, &fourbeta, &fourbeta);
+    ec_felem_add(group, &tmptmp, &fourbeta, &fourbeta);
+    ec_felem_sub(group, &r->X, &r->X, &tmptmp);
+
+    // z' = (y + z)^2 - gamma - delta
+    ec_felem_add(group, &delta, &gamma, &delta);
+    ec_felem_add(group, &ftmp, &a->Y, &a->Z);
+    ec_GFp_mont_felem_sqr(group, &r->Z, &ftmp);
+    ec_felem_sub(group, &r->Z, &r->Z, &delta);
+
+    // y' = alpha*(4*beta - x') - 8*gamma^2
+    ec_felem_sub(group, &r->Y, &fourbeta, &r->X);
+    ec_felem_add(group, &gamma, &gamma, &gamma);
+    ec_GFp_mont_felem_sqr(group, &gamma, &gamma);
+    ec_GFp_mont_felem_mul(group, &r->Y, &alpha, &r->Y);
+    ec_felem_add(group, &gamma, &gamma, &gamma);
+    ec_felem_sub(group, &r->Y, &r->Y, &gamma);
+  } else {
+    // The method is taken from:
+    //   http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
+    //
+    // Coq transcription and correctness proof:
+    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L102>
+    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L534>
+    EC_FELEM xx, yy, yyyy, zz;
+    ec_GFp_mont_felem_sqr(group, &xx, &a->X);
+    ec_GFp_mont_felem_sqr(group, &yy, &a->Y);
+    ec_GFp_mont_felem_sqr(group, &yyyy, &yy);
+    ec_GFp_mont_felem_sqr(group, &zz, &a->Z);
+
+    // s = 2*((x_in + yy)^2 - xx - yyyy)
+    EC_FELEM s;
+    ec_felem_add(group, &s, &a->X, &yy);
+    ec_GFp_mont_felem_sqr(group, &s, &s);
+    ec_felem_sub(group, &s, &s, &xx);
+    ec_felem_sub(group, &s, &s, &yyyy);
+    ec_felem_add(group, &s, &s, &s);
+
+    // m = 3*xx + a*zz^2
+    EC_FELEM m;
+    ec_GFp_mont_felem_sqr(group, &m, &zz);
+    ec_GFp_mont_felem_mul(group, &m, &group->a, &m);
+    ec_felem_add(group, &m, &m, &xx);
+    ec_felem_add(group, &m, &m, &xx);
+    ec_felem_add(group, &m, &m, &xx);
+
+    // x_out = m^2 - 2*s
+    ec_GFp_mont_felem_sqr(group, &r->X, &m);
+    ec_felem_sub(group, &r->X, &r->X, &s);
+    ec_felem_sub(group, &r->X, &r->X, &s);
+
+    // z_out = (y_in + z_in)^2 - yy - zz
+    ec_felem_add(group, &r->Z, &a->Y, &a->Z);
+    ec_GFp_mont_felem_sqr(group, &r->Z, &r->Z);
+    ec_felem_sub(group, &r->Z, &r->Z, &yy);
+    ec_felem_sub(group, &r->Z, &r->Z, &zz);
+
+    // y_out = m*(s-x_out) - 8*yyyy
+    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
+    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
+    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
+    ec_felem_sub(group, &r->Y, &s, &r->X);
+    ec_GFp_mont_felem_mul(group, &r->Y, &r->Y, &m);
+    ec_felem_sub(group, &r->Y, &r->Y, &yyyy);
+  }
+}
+
 DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) {
   out->group_init = ec_GFp_mont_group_init;
   out->group_finish = ec_GFp_mont_group_finish;
   out->group_set_curve = ec_GFp_mont_group_set_curve;
   out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
-  out->mul = ec_GFp_simple_mul;
-  out->mul_public = ec_GFp_simple_mul_public;
+  out->add = ec_GFp_mont_add;
+  out->dbl = ec_GFp_mont_dbl;
+  out->mul = ec_GFp_mont_mul;
+  out->mul_public = ec_GFp_mont_mul_public;
   out->felem_mul = ec_GFp_mont_felem_mul;
   out->felem_sqr = ec_GFp_mont_felem_sqr;
   out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
   out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
   out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
+  out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
+  out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
 }
diff --git a/src/crypto/fipsmodule/ec/internal.h b/src/crypto/fipsmodule/ec/internal.h
index bb172b26..4afaef94 100644
--- a/src/crypto/fipsmodule/ec/internal.h
+++ b/src/crypto/fipsmodule/ec/internal.h
@@ -124,8 +124,21 @@ struct ec_method_st {
   void (*group_finish)(EC_GROUP *);
   int (*group_set_curve)(EC_GROUP *, const BIGNUM *p, const BIGNUM *a,
                          const BIGNUM *b, BN_CTX *);
-  int (*point_get_affine_coordinates)(const EC_GROUP *, const EC_RAW_POINT *,
-                                      BIGNUM *x, BIGNUM *y);
+
+  // point_get_affine_coordinates sets |*x| and |*y| to the affine coordinates
+  // of |p|. Either |x| or |y| may be NULL to omit it. It returns one on success
+  // and zero if |p| is the point at infinity.
+  //
+  // Note: unlike |EC_FELEM|s used as intermediate values internal to the
+  // |EC_METHOD|, |*x| and |*y| are not encoded in Montgomery form.
+  int (*point_get_affine_coordinates)(const EC_GROUP *, const EC_RAW_POINT *p,
+                                      EC_FELEM *x, EC_FELEM *y);
+
+  // add sets |r| to |a| + |b|.
+  void (*add)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a,
+              const EC_RAW_POINT *b);
+  // dbl sets |r| to |a| + |a|.
+  void (*dbl)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a);
 
   // Computes |r = g_scalar*generator + p_scalar*p| if |g_scalar| and |p_scalar|
   // are both non-null. Computes |r = g_scalar*generator| if |p_scalar| is null.
@@ -149,10 +162,9 @@ struct ec_method_st {
   // TODO(davidben): This constrains |EC_FELEM|'s internal representation, adds
   // many indirect calls in the middle of the generic code, and a bunch of
   // conversions. If p224-64.c were easily convertable to Montgomery form, we
-  // could say |EC_FELEM| is always in Montgomery form. If we exposed the
-  // internal add and double implementations in each of the curves, we could
-  // give |EC_POINT| an |EC_METHOD|-specific representation and |EC_FELEM| is
-  // purely a |EC_GFp_mont_method| type.
+  // could say |EC_FELEM| is always in Montgomery form. If we routed the rest of
+  // simple.c to |EC_METHOD|, we could give |EC_POINT| an |EC_METHOD|-specific
+  // representation and say |EC_FELEM| is purely a |EC_GFp_mont_method| type.
   void (*felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
                     const EC_FELEM *b);
   void (*felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a);
@@ -162,11 +174,22 @@ struct ec_method_st {
   int (*felem_to_bignum)(const EC_GROUP *group, BIGNUM *out,
                          const EC_FELEM *in);
 
-  // scalar_inv_mont sets |out| to |in|^-1, where both input and output are in
-  // Montgomery form.
+  // scalar_inv_montgomery sets |out| to |in|^-1, where both input and output
+  // are in Montgomery form.
   void (*scalar_inv_montgomery)(const EC_GROUP *group, EC_SCALAR *out,
                                 const EC_SCALAR *in);
 
+  // scalar_inv_montgomery_vartime performs the same computation as
+  // |scalar_inv_montgomery|. It further assumes that the inputs are public so
+  // there is no concern about leaking their values through timing.
+  int (*scalar_inv_montgomery_vartime)(const EC_GROUP *group, EC_SCALAR *out,
+                                       const EC_SCALAR *in);
+
+  // cmp_x_coordinate compares the x (affine) coordinate of |p|, mod the group
+  // order, with |r|. On error it returns zero. Otherwise it sets |*out_result|
+  // to one iff the values match.
+  int (*cmp_x_coordinate)(int *out_result, const EC_GROUP *group,
+                          const EC_POINT *p, const BIGNUM *r, BN_CTX *ctx);
 } /* EC_METHOD */;
 
 const EC_METHOD *EC_GFp_mont_method(void);
@@ -272,6 +295,11 @@ void ec_scalar_mul_montgomery(const EC_GROUP *group, EC_SCALAR *r,
 void ec_scalar_inv_montgomery(const EC_GROUP *group, EC_SCALAR *r,
                               const EC_SCALAR *a);
 
+// ec_scalar_inv_montgomery_vartime performs the same actions as
+// |ec_scalar_inv_montgomery|, but in variable time.
+int ec_scalar_inv_montgomery_vartime(const EC_GROUP *group, EC_SCALAR *r,
+                                     const EC_SCALAR *a);
+
 // ec_point_mul_scalar sets |r| to generator * |g_scalar| + |p| *
 // |p_scalar|. Unlike other functions which take |EC_SCALAR|, |g_scalar| and
 // |p_scalar| need not be fully reduced. They need only contain as many bits as
@@ -287,9 +315,19 @@ OPENSSL_EXPORT int ec_point_mul_scalar_public(
     const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
     const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
 
-void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
-                       const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
-                       const EC_SCALAR *p_scalar);
+// ec_cmp_x_coordinate compares the x (affine) coordinate of |p| with |r|. It
+// returns zero on error. Otherwise it sets |*out_result| to one iff the values
+// match.
+int ec_cmp_x_coordinate(int *out_result, const EC_GROUP *group,
+                        const EC_POINT *p, const BIGNUM *r, BN_CTX *ctx);
+
+// ec_field_element_to_scalar reduces |r| modulo |group->order|. |r| must
+// previously have been reduced modulo |group->field|.
+int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r);
+
+void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+                     const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                     const EC_SCALAR *p_scalar);
 
 // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of
 // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of
@@ -302,9 +340,9 @@ void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
 void ec_compute_wNAF(const EC_GROUP *group, int8_t *out,
                      const EC_SCALAR *scalar, size_t bits, int w);
 
-void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
-                              const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
-                              const EC_SCALAR *p_scalar);
+void ec_GFp_mont_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
+                            const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                            const EC_SCALAR *p_scalar);
 
 // method functions in simple.c
 int ec_GFp_simple_group_init(EC_GROUP *);
@@ -313,17 +351,15 @@ int ec_GFp_simple_group_set_curve(EC_GROUP *, const BIGNUM *p, const BIGNUM *a,
                                   const BIGNUM *b, BN_CTX *);
 int ec_GFp_simple_group_get_curve(const EC_GROUP *, BIGNUM *p, BIGNUM *a,
                                   BIGNUM *b);
-unsigned ec_GFp_simple_group_get_degree(const EC_GROUP *);
 void ec_GFp_simple_point_init(EC_RAW_POINT *);
 void ec_GFp_simple_point_copy(EC_RAW_POINT *, const EC_RAW_POINT *);
 void ec_GFp_simple_point_set_to_infinity(const EC_GROUP *, EC_RAW_POINT *);
 int ec_GFp_simple_point_set_affine_coordinates(const EC_GROUP *, EC_RAW_POINT *,
                                                const BIGNUM *x,
                                                const BIGNUM *y);
-void ec_GFp_simple_add(const EC_GROUP *, EC_RAW_POINT *r, const EC_RAW_POINT *a,
-                       const EC_RAW_POINT *b);
-void ec_GFp_simple_dbl(const EC_GROUP *, EC_RAW_POINT *r,
-                       const EC_RAW_POINT *a);
+void ec_GFp_mont_add(const EC_GROUP *, EC_RAW_POINT *r, const EC_RAW_POINT *a,
+                     const EC_RAW_POINT *b);
+void ec_GFp_mont_dbl(const EC_GROUP *, EC_RAW_POINT *r, const EC_RAW_POINT *a);
 void ec_GFp_simple_invert(const EC_GROUP *, EC_RAW_POINT *);
 int ec_GFp_simple_is_at_infinity(const EC_GROUP *, const EC_RAW_POINT *);
 int ec_GFp_simple_is_on_curve(const EC_GROUP *, const EC_RAW_POINT *);
@@ -332,6 +368,16 @@ int ec_GFp_simple_cmp(const EC_GROUP *, const EC_RAW_POINT *a,
 void ec_simple_scalar_inv_montgomery(const EC_GROUP *group, EC_SCALAR *r,
                                      const EC_SCALAR *a);
 
+int ec_GFp_simple_mont_inv_mod_ord_vartime(const EC_GROUP *group, EC_SCALAR *r,
+                                           const EC_SCALAR *a);
+
+// ec_GFp_simple_cmp_x_coordinate compares the x (affine) coordinate of |p|, mod
+// the group order, with |r|. It returns zero on error. Otherwise it sets
+// |*out_result| to one iff the values match.
+int ec_GFp_simple_cmp_x_coordinate(int *out_result, const EC_GROUP *group,
+                                   const EC_POINT *p, const BIGNUM *r,
+                                   BN_CTX *ctx);
+
 // method functions in montgomery.c
 int ec_GFp_mont_group_init(EC_GROUP *);
 int ec_GFp_mont_group_set_curve(EC_GROUP *, const BIGNUM *p, const BIGNUM *a,
diff --git a/src/crypto/fipsmodule/ec/p224-64.c b/src/crypto/fipsmodule/ec/p224-64.c
index 606108fc..49d5328f 100644
--- a/src/crypto/fipsmodule/ec/p224-64.c
+++ b/src/crypto/fipsmodule/ec/p224-64.c
@@ -203,13 +203,6 @@ static void p224_felem_to_bin28(uint8_t out[28], const p224_felem in) {
   }
 }
 
-// From internal representation to OpenSSL BIGNUM
-static BIGNUM *p224_felem_to_BN(BIGNUM *out, const p224_felem in) {
-  p224_felem_bytearray b_out;
-  p224_felem_to_bin28(b_out, in);
-  return BN_le2bn(b_out, sizeof(b_out), out);
-}
-
 static void p224_generic_to_felem(p224_felem out, const EC_FELEM *in) {
   p224_bin28_to_felem(out, in->bytes);
 }
@@ -917,7 +910,7 @@ static void p224_batch_mul(p224_felem x_out, p224_felem y_out, p224_felem z_out,
 
       if (!skip) {
         p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                  tmp[0], tmp[1], tmp[2]);
+                       tmp[0], tmp[1], tmp[2]);
       } else {
         OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
         skip = 0;
@@ -951,7 +944,7 @@ static void p224_batch_mul(p224_felem x_out, p224_felem y_out, p224_felem z_out,
 
       if (!skip) {
         p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
-                  tmp[0], tmp[1], tmp[2]);
+                       tmp[0], tmp[1], tmp[2]);
       } else {
         OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
         skip = 0;
@@ -971,7 +964,8 @@ static void p224_batch_mul(p224_felem x_out, p224_felem y_out, p224_felem z_out,
 // Takes the Jacobian coordinates (X, Y, Z) of a point and returns
 // (X', Y') = (X/Z^2, Y/Z^3)
 static int ec_GFp_nistp224_point_get_affine_coordinates(
-    const EC_GROUP *group, const EC_RAW_POINT *point, BIGNUM *x, BIGNUM *y) {
+    const EC_GROUP *group, const EC_RAW_POINT *point, EC_FELEM *x,
+    EC_FELEM *y) {
   if (ec_GFp_simple_is_at_infinity(group, point)) {
     OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
     return 0;
@@ -990,10 +984,7 @@ static int ec_GFp_nistp224_point_get_affine_coordinates(
     p224_felem_mul(tmp, x_in, z1);
     p224_felem_reduce(x_in, tmp);
     p224_felem_contract(x_out, x_in);
-    if (!p224_felem_to_BN(x, x_out)) {
-      OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-      return 0;
-    }
+    p224_felem_to_generic(x, x_out);
   }
 
   if (y != NULL) {
@@ -1004,15 +995,39 @@ static int ec_GFp_nistp224_point_get_affine_coordinates(
     p224_felem_mul(tmp, y_in, z1);
     p224_felem_reduce(y_in, tmp);
     p224_felem_contract(y_out, y_in);
-    if (!p224_felem_to_BN(y, y_out)) {
-      OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-      return 0;
-    }
+    p224_felem_to_generic(y, y_out);
   }
 
   return 1;
 }
 
+static void ec_GFp_nistp224_add(const EC_GROUP *group, EC_RAW_POINT *r,
+                                const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
+  p224_felem x1, y1, z1, x2, y2, z2;
+  p224_generic_to_felem(x1, &a->X);
+  p224_generic_to_felem(y1, &a->Y);
+  p224_generic_to_felem(z1, &a->Z);
+  p224_generic_to_felem(x2, &b->X);
+  p224_generic_to_felem(y2, &b->Y);
+  p224_generic_to_felem(z2, &b->Z);
+  p224_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, z2);
+  p224_felem_to_generic(&r->X, x1);
+  p224_felem_to_generic(&r->Y, y1);
+  p224_felem_to_generic(&r->Z, z1);
+}
+
+static void ec_GFp_nistp224_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
+                                const EC_RAW_POINT *a) {
+  p224_felem x, y, z;
+  p224_generic_to_felem(x, &a->X);
+  p224_generic_to_felem(y, &a->Y);
+  p224_generic_to_felem(z, &a->Z);
+  p224_point_double(x, y, z, x, y, z);
+  p224_felem_to_generic(&r->X, x);
+  p224_felem_to_generic(&r->Y, y);
+  p224_felem_to_generic(&r->Z, z);
+}
+
 static void ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
                                        const EC_SCALAR *g_scalar,
                                        const EC_RAW_POINT *p,
@@ -1036,13 +1051,13 @@ static void ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
     for (size_t j = 2; j <= 16; ++j) {
       if (j & 1) {
         p224_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
-                  p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2],
-                  0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
-                  p_pre_comp[j - 1][2]);
+                       p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0,
+                       p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
+                       p_pre_comp[j - 1][2]);
       } else {
-        p224_point_double(p_pre_comp[j][0], p_pre_comp[j][1],
-                     p_pre_comp[j][2], p_pre_comp[j / 2][0],
-                     p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
+        p224_point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
+                          p_pre_comp[j / 2][0], p_pre_comp[j / 2][1],
+                          p_pre_comp[j / 2][2]);
       }
     }
   }
@@ -1100,6 +1115,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp224_method) {
   out->group_set_curve = ec_GFp_simple_group_set_curve;
   out->point_get_affine_coordinates =
       ec_GFp_nistp224_point_get_affine_coordinates;
+  out->add = ec_GFp_nistp224_add;
+  out->dbl = ec_GFp_nistp224_dbl;
   out->mul = ec_GFp_nistp224_points_mul;
   out->mul_public = ec_GFp_nistp224_points_mul;
   out->felem_mul = ec_GFp_nistp224_felem_mul;
@@ -1107,6 +1124,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp224_method) {
   out->bignum_to_felem = ec_GFp_nistp224_bignum_to_felem;
   out->felem_to_bignum = ec_GFp_nistp224_felem_to_bignum;
   out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
+  out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
+  out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
 };
 
 #endif  // BORINGSSL_HAS_UINT128 && !SMALL
diff --git a/src/crypto/fipsmodule/ec/p256-x86_64.c b/src/crypto/fipsmodule/ec/p256-x86_64.c
index a4f65156..e7f49099 100644
--- a/src/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/src/crypto/fipsmodule/ec/p256-x86_64.c
@@ -44,6 +44,12 @@ static const BN_ULONG ONE[P256_LIMBS] = {
     TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe),
 };
 
+// P256_ORDER is the order of the P-256 group, not in Montgomery form.
+static const BN_ULONG P256_ORDER[P256_LIMBS] = {
+    TOBN(0xf3b9cac2, 0xfc632551), TOBN(0xbce6faad, 0xa7179e84),
+    TOBN(0xffffffff, 0xffffffff), TOBN(0xffffffff, 0x00000000),
+};
+
 // Precomputed tables for the default generator
 #include "p256-x86_64-table.h"
 
@@ -289,19 +295,63 @@ static void ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
   ecp_nistz256_point_add(r, r, &h);
 }
 
+typedef union {
+  P256_POINT p;
+  P256_POINT_AFFINE a;
+} p256_point_union_t;
+
+static unsigned calc_first_wvalue(unsigned *index, const uint8_t p_str[33]) {
+  static const unsigned kWindowSize = 7;
+  static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+  *index = kWindowSize;
+
+  unsigned wvalue = (p_str[0] << 1) & kMask;
+  return booth_recode_w7(wvalue);
+}
+
+static unsigned calc_wvalue(unsigned *index, const uint8_t p_str[33]) {
+  static const unsigned kWindowSize = 7;
+  static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+
+  const unsigned off = (*index - 1) / 8;
+  unsigned wvalue = p_str[off] | p_str[off + 1] << 8;
+  wvalue = (wvalue >> ((*index - 1) % 8)) & kMask;
+  *index += kWindowSize;
+
+  return booth_recode_w7(wvalue);
+}
+
+static void mul_p_add_and_store(const EC_GROUP *group, EC_RAW_POINT *r,
+                                const EC_SCALAR *g_scalar,
+                                const EC_RAW_POINT *p_,
+                                const EC_SCALAR *p_scalar,
+                                p256_point_union_t *t, p256_point_union_t *p) {
+  const int p_is_infinity = g_scalar == NULL;
+  if (p_scalar != NULL) {
+    P256_POINT *out = &t->p;
+    if (p_is_infinity) {
+      out = &p->p;
+    }
+
+    ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
+    if (!p_is_infinity) {
+      ecp_nistz256_point_add(&p->p, &p->p, out);
+    }
+  }
+
+  assert(group->field.width == P256_LIMBS);
+  OPENSSL_memcpy(r->X.words, p->p.X, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->Y.words, p->p.Y, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->Z.words, p->p.Z, P256_LIMBS * sizeof(BN_ULONG));
+}
+
 static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
                                     const EC_SCALAR *g_scalar,
                                     const EC_RAW_POINT *p_,
                                     const EC_SCALAR *p_scalar) {
   assert((p_ != NULL) == (p_scalar != NULL));
 
-  static const unsigned kWindowSize = 7;
-  static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
-
-  alignas(32) union {
-    P256_POINT p;
-    P256_POINT_AFFINE a;
-  } t, p;
+  alignas(32) p256_point_union_t t, p;
 
   if (g_scalar != NULL) {
     uint8_t p_str[33];
@@ -309,10 +359,8 @@ static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
     p_str[32] = 0;
 
     // First window
-    unsigned wvalue = (p_str[0] << 1) & kMask;
-    unsigned index = kWindowSize;
-
-    wvalue = booth_recode_w7(wvalue);
+    unsigned index = 0;
+    unsigned wvalue = calc_first_wvalue(&index, p_str);
 
     const PRECOMP256_ROW *const precomputed_table =
         (const PRECOMP256_ROW *)ecp_nistz256_precomputed;
@@ -328,12 +376,7 @@ static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
     copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
 
     for (int i = 1; i < 37; i++) {
-      unsigned off = (index - 1) / 8;
-      wvalue = p_str[off] | p_str[off + 1] << 8;
-      wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
-      index += kWindowSize;
-
-      wvalue = booth_recode_w7(wvalue);
+      wvalue = calc_wvalue(&index, p_str);
 
       ecp_nistz256_select_w7(&t.a, precomputed_table[i], wvalue >> 1);
 
@@ -344,28 +387,65 @@ static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
     }
   }
 
-  const int p_is_infinity = g_scalar == NULL;
-  if (p_scalar != NULL) {
-    P256_POINT *out = &t.p;
-    if (p_is_infinity) {
-      out = &p.p;
+  mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
+}
+
+static void ecp_nistz256_points_mul_public(const EC_GROUP *group,
+                                           EC_RAW_POINT *r,
+                                           const EC_SCALAR *g_scalar,
+                                           const EC_RAW_POINT *p_,
+                                           const EC_SCALAR *p_scalar) {
+  assert(p_ != NULL && p_scalar != NULL && g_scalar != NULL);
+
+  alignas(32) p256_point_union_t t, p;
+  uint8_t p_str[33];
+  OPENSSL_memcpy(p_str, g_scalar->bytes, 32);
+  p_str[32] = 0;
+
+  // First window
+  unsigned index = 0;
+  unsigned wvalue = calc_first_wvalue(&index, p_str);
+
+  const PRECOMP256_ROW *const precomputed_table =
+      (const PRECOMP256_ROW *)ecp_nistz256_precomputed;
+
+  // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
+  // is infinity and |ONE| otherwise. |p| was computed from the table, so it
+  // is infinity iff |wvalue >> 1| is zero.
+  if ((wvalue >> 1) != 0) {
+    OPENSSL_memcpy(&p.a, &precomputed_table[0][(wvalue >> 1) - 1], sizeof(p.a));
+    OPENSSL_memcpy(&p.p.Z, ONE, sizeof(p.p.Z));
+  } else {
+    OPENSSL_memset(&p.a, 0, sizeof(p.a));
+    OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
+  }
+
+  if ((wvalue & 1) == 1) {
+    ecp_nistz256_neg(p.p.Y, p.p.Y);
+  }
+
+  for (int i = 1; i < 37; i++) {
+    wvalue = calc_wvalue(&index, p_str);
+
+    if ((wvalue >> 1) == 0) {
+      continue;
     }
 
-    ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
-    if (!p_is_infinity) {
-      ecp_nistz256_point_add(&p.p, &p.p, out);
+    OPENSSL_memcpy(&t.a, &precomputed_table[i][(wvalue >> 1) - 1], sizeof(p.a));
+
+    if ((wvalue & 1) == 1) {
+      ecp_nistz256_neg(t.a.Y, t.a.Y);
     }
+
+    ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
   }
 
-  assert(group->field.width == P256_LIMBS);
-  OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG));
-  OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG));
-  OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG));
+  mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
 }
 
 static int ecp_nistz256_get_affine(const EC_GROUP *group,
-                                   const EC_RAW_POINT *point, BIGNUM *x,
-                                   BIGNUM *y) {
+                                   const EC_RAW_POINT *point, EC_FELEM *x,
+                                   EC_FELEM *y) {
   if (ec_GFp_simple_is_at_infinity(group, point)) {
     OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
     return 0;
@@ -384,27 +464,44 @@ static int ecp_nistz256_get_affine(const EC_GROUP *group,
   ecp_nistz256_from_mont(z_inv2, z_inv2);
 
   if (x != NULL) {
-    BN_ULONG x_aff[P256_LIMBS];
-    ecp_nistz256_mul_mont(x_aff, z_inv2, point->X.words);
-    if (!bn_set_words(x, x_aff, P256_LIMBS)) {
-      OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
-      return 0;
-    }
+    ecp_nistz256_mul_mont(x->words, z_inv2, point->X.words);
   }
 
   if (y != NULL) {
-    BN_ULONG y_aff[P256_LIMBS];
     ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2);
-    ecp_nistz256_mul_mont(y_aff, z_inv3, point->Y.words);
-    if (!bn_set_words(y, y_aff, P256_LIMBS)) {
-      OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
-      return 0;
-    }
+    ecp_nistz256_mul_mont(y->words, z_inv3, point->Y.words);
   }
 
   return 1;
 }
 
+static void ecp_nistz256_add(const EC_GROUP *group, EC_RAW_POINT *r,
+                             const EC_RAW_POINT *a_, const EC_RAW_POINT *b_) {
+  P256_POINT a, b;
+  OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(b.X, b_->X.words, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(b.Y, b_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(b.Z, b_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
+  ecp_nistz256_point_add(&a, &a, &b);
+  OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
+}
+
+static void ecp_nistz256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
+                             const EC_RAW_POINT *a_) {
+  P256_POINT a;
+  OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
+  ecp_nistz256_point_double(&a, &a);
+  OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
+}
+
 static void ecp_nistz256_inv_mod_ord(const EC_GROUP *group, EC_SCALAR *out,
                                      const EC_SCALAR *in) {
   // table[i] stores a power of |in| corresponding to the matching enum value.
@@ -486,18 +583,92 @@ static void ecp_nistz256_inv_mod_ord(const EC_GROUP *group, EC_SCALAR *out,
   }
 }
 
+static int ecp_nistz256_mont_inv_mod_ord_vartime(const EC_GROUP *group,
+                                                 EC_SCALAR *out,
+                                                 const EC_SCALAR *in) {
+  if ((OPENSSL_ia32cap_P[1] & (1 << 28)) == 0) {
+    // No AVX support; fallback to generic code.
+    return ec_GFp_simple_mont_inv_mod_ord_vartime(group, out, in);
+  }
+
+  if (!beeu_mod_inverse_vartime(out->words, in->words, P256_ORDER)) {
+    return 0;
+  }
+
+  // The result should be returned in the Montgomery domain.
+  ec_scalar_to_montgomery(group, out, out);
+  return 1;
+}
+
+static int ecp_nistz256_cmp_x_coordinate(int *out_result, const EC_GROUP *group,
+                                         const EC_POINT *p, const BIGNUM *r,
+                                         BN_CTX *ctx) {
+  *out_result = 0;
+
+  if (ec_GFp_simple_is_at_infinity(group, &p->raw)) {
+    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
+    return 0;
+  }
+
+  BN_ULONG r_words[P256_LIMBS];
+  if (!bn_copy_words(r_words, P256_LIMBS, r)) {
+    return 0;
+  }
+
+  // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
+  // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
+  // not.
+  BN_ULONG r_Z2[P256_LIMBS], Z2_mont[P256_LIMBS], X[P256_LIMBS];
+  ecp_nistz256_mul_mont(Z2_mont, p->raw.Z.words, p->raw.Z.words);
+  ecp_nistz256_mul_mont(r_Z2, r_words, Z2_mont);
+  ecp_nistz256_from_mont(X, p->raw.X.words);
+
+  if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
+    *out_result = 1;
+    return 1;
+  }
+
+  // During signing the x coefficient is reduced modulo the group order.
+  // Therefore there is a small possibility, less than 1/2^128, that group_order
+  // < p.x < P. in that case we need not only to compare against |r| but also to
+  // compare against r+group_order.
+
+  // P_MINUS_ORDER is the difference between the field order (p) and the group
+  // order (N). This value is not in the Montgomery domain.
+  static const BN_ULONG P_MINUS_ORDER[P256_LIMBS] = {
+      TOBN(0x0c46353d, 0x039cdaae), TOBN(0x43190553, 0x58e8617b),
+      TOBN(0x00000000, 0x00000000), TOBN(0x00000000, 0x00000000)};
+
+  if (bn_less_than_words(r_words, P_MINUS_ORDER, P256_LIMBS)) {
+    // We can add in-place, ignoring the carry, because: r + group_order < p <
+    // 2^256
+    bn_add_words(r_words, r_words, P256_ORDER, P256_LIMBS);
+    ecp_nistz256_mul_mont(r_Z2, r_words, Z2_mont);
+    if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
+      *out_result = 1;
+      return 1;
+    }
+  }
+
+  return 1;
+}
+
 DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
   out->group_init = ec_GFp_mont_group_init;
   out->group_finish = ec_GFp_mont_group_finish;
   out->group_set_curve = ec_GFp_mont_group_set_curve;
   out->point_get_affine_coordinates = ecp_nistz256_get_affine;
+  out->add = ecp_nistz256_add;
+  out->dbl = ecp_nistz256_dbl;
   out->mul = ecp_nistz256_points_mul;
-  out->mul_public = ecp_nistz256_points_mul;
+  out->mul_public = ecp_nistz256_points_mul_public;
   out->felem_mul = ec_GFp_mont_felem_mul;
   out->felem_sqr = ec_GFp_mont_felem_sqr;
   out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
   out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
   out->scalar_inv_montgomery = ecp_nistz256_inv_mod_ord;
+  out->scalar_inv_montgomery_vartime = ecp_nistz256_mont_inv_mod_ord_vartime;
+  out->cmp_x_coordinate = ecp_nistz256_cmp_x_coordinate;
 };
 
 #endif /* !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
diff --git a/src/crypto/fipsmodule/ec/p256-x86_64.h b/src/crypto/fipsmodule/ec/p256-x86_64.h
index 21b461cf..9de32406 100644
--- a/src/crypto/fipsmodule/ec/p256-x86_64.h
+++ b/src/crypto/fipsmodule/ec/p256-x86_64.h
@@ -61,6 +61,16 @@ static inline void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],
   ecp_nistz256_mul_mont(res, in, ONE);
 }
 
+// ecp_nistz256_to_mont sets |res| to |in|, converted to Montgomery domain
+// by multiplying with RR = 2^512 mod P precomputed for NIST P256 curve.
+static inline void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS],
+                                        const BN_ULONG in[P256_LIMBS]) {
+  static const BN_ULONG RR[P256_LIMBS] = {
+      TOBN(0x00000000, 0x00000003), TOBN(0xfffffffb, 0xffffffff),
+      TOBN(0xffffffff, 0xfffffffe), TOBN(0x00000004, 0xfffffffd)};
+  ecp_nistz256_mul_mont(res, in, RR);
+}
+
 
 // P-256 scalar operations.
 //
@@ -79,6 +89,12 @@ void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS],
 void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
                                const BN_ULONG a[P256_LIMBS], int rep);
 
+// beeu_mod_inverse_vartime sets out = a^-1 mod p using a Euclidean algorithm.
+// Assumption: 0 < a < p < 2^(256) and p is odd.
+int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],
+                             const BN_ULONG a[P256_LIMBS],
+                             const BN_ULONG p[P256_LIMBS]);
+
 
 // P-256 point operations.
 //
diff --git a/src/crypto/fipsmodule/ec/p256-x86_64_test.cc b/src/crypto/fipsmodule/ec/p256-x86_64_test.cc
index 8ed1dd4d..ab93dfb6 100644
--- a/src/crypto/fipsmodule/ec/p256-x86_64_test.cc
+++ b/src/crypto/fipsmodule/ec/p256-x86_64_test.cc
@@ -24,8 +24,12 @@
 #include <gtest/gtest.h>
 
 #include <openssl/bn.h>
+#include <openssl/cpu.h>
+#include <openssl/ec.h>
 #include <openssl/mem.h>
+#include <openssl/nid.h>
 
+#include "internal.h"
 #include "../bn/internal.h"
 #include "../../internal.h"
 #include "../../test/file_test.h"
@@ -87,6 +91,64 @@ TEST(P256_X86_64Test, SelectW7) {
   }
 }
 
+TEST(P256_X86_64Test, BEEU) {
+  if ((OPENSSL_ia32cap_P[1] & (1 << 28)) == 0) {
+    // No AVX support; cannot run the BEEU code.
+    return;
+  }
+
+  bssl::UniquePtr<EC_GROUP> group(
+      EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1));
+  ASSERT_TRUE(group);
+
+  BN_ULONG order_words[P256_LIMBS];
+  ASSERT_TRUE(
+      bn_copy_words(order_words, P256_LIMBS, EC_GROUP_get0_order(group.get())));
+
+  BN_ULONG in[P256_LIMBS], out[P256_LIMBS];
+  EC_SCALAR in_scalar, out_scalar, result;
+  OPENSSL_memset(in, 0, sizeof(in));
+
+  // Trying to find the inverse of zero should fail.
+  ASSERT_FALSE(beeu_mod_inverse_vartime(out, in, order_words));
+
+  // kOneMont is 1, in Montgomery form.
+  static const BN_ULONG kOneMont[P256_LIMBS] = {
+      TOBN(0xc46353d, 0x039cdaaf), TOBN(0x43190552, 0x58e8617b),
+      0, 0xffffffff,
+  };
+
+  for (BN_ULONG i = 1; i < 2000; i++) {
+    SCOPED_TRACE(i);
+
+    in[0] = i;
+    if (i >= 1000) {
+      in[1] = i << 8;
+      in[2] = i << 32;
+      in[3] = i << 48;
+    } else {
+      in[1] = in[2] = in[3] = 0;
+    }
+
+    EXPECT_TRUE(bn_less_than_words(in, order_words, P256_LIMBS));
+    ASSERT_TRUE(beeu_mod_inverse_vartime(out, in, order_words));
+    EXPECT_TRUE(bn_less_than_words(out, order_words, P256_LIMBS));
+
+    // Calculate out*in and confirm that it equals one, modulo the order.
+    OPENSSL_memcpy(in_scalar.bytes, in, sizeof(in));
+    OPENSSL_memcpy(out_scalar.bytes, out, sizeof(out));
+    ec_scalar_to_montgomery(group.get(), &in_scalar, &in_scalar);
+    ec_scalar_to_montgomery(group.get(), &out_scalar, &out_scalar);
+    ec_scalar_mul_montgomery(group.get(), &result, &in_scalar, &out_scalar);
+
+    EXPECT_EQ(0, OPENSSL_memcmp(kOneMont, &result, sizeof(kOneMont)));
+
+    // Invert the result and expect to get back to the original value.
+    ASSERT_TRUE(beeu_mod_inverse_vartime(out, out, order_words));
+    EXPECT_EQ(0, OPENSSL_memcmp(in, out, sizeof(in)));
+  }
+}
+
 static bool GetFieldElement(FileTest *t, BN_ULONG out[P256_LIMBS],
                             const char *name) {
   std::vector<uint8_t> bytes;
diff --git a/src/crypto/fipsmodule/ec/scalar.c b/src/crypto/fipsmodule/ec/scalar.c
index 1bd6b024..88678a93 100644
--- a/src/crypto/fipsmodule/ec/scalar.c
+++ b/src/crypto/fipsmodule/ec/scalar.c
@@ -74,3 +74,8 @@ void ec_scalar_inv_montgomery(const EC_GROUP *group, EC_SCALAR *r,
                               const EC_SCALAR *a) {
   group->meth->scalar_inv_montgomery(group, r, a);
 }
+
+int ec_scalar_inv_montgomery_vartime(const EC_GROUP *group, EC_SCALAR *r,
+                                     const EC_SCALAR *a) {
+  return group->meth->scalar_inv_montgomery_vartime(group, r, a);
+}
diff --git a/src/crypto/fipsmodule/ec/simple.c b/src/crypto/fipsmodule/ec/simple.c
index 5c637118..8b862ff6 100644
--- a/src/crypto/fipsmodule/ec/simple.c
+++ b/src/crypto/fipsmodule/ec/simple.c
@@ -171,10 +171,6 @@ int ec_GFp_simple_group_get_curve(const EC_GROUP *group, BIGNUM *p, BIGNUM *a,
   return 1;
 }
 
-unsigned ec_GFp_simple_group_get_degree(const EC_GROUP *group) {
-  return BN_num_bits(&group->field);
-}
-
 void ec_GFp_simple_point_init(EC_RAW_POINT *point) {
   OPENSSL_memset(&point->X, 0, sizeof(EC_FELEM));
   OPENSSL_memset(&point->Y, 0, sizeof(EC_FELEM));
@@ -212,222 +208,6 @@ int ec_GFp_simple_point_set_affine_coordinates(const EC_GROUP *group,
   return 1;
 }
 
-void ec_GFp_simple_add(const EC_GROUP *group, EC_RAW_POINT *out,
-                       const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
-  if (a == b) {
-    ec_GFp_simple_dbl(group, out, a);
-    return;
-  }
-
-
-  // The method is taken from:
-  //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
-  //
-  // Coq transcription and correctness proof:
-  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
-  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
-  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
-                          const EC_FELEM *b) = group->meth->felem_mul;
-  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
-      group->meth->felem_sqr;
-
-  EC_FELEM x_out, y_out, z_out;
-  BN_ULONG z1nz = ec_felem_non_zero_mask(group, &a->Z);
-  BN_ULONG z2nz = ec_felem_non_zero_mask(group, &b->Z);
-
-  // z1z1 = z1z1 = z1**2
-  EC_FELEM z1z1;
-  felem_sqr(group, &z1z1, &a->Z);
-
-  // z2z2 = z2**2
-  EC_FELEM z2z2;
-  felem_sqr(group, &z2z2, &b->Z);
-
-  // u1 = x1*z2z2
-  EC_FELEM u1;
-  felem_mul(group, &u1, &a->X, &z2z2);
-
-  // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
-  EC_FELEM two_z1z2;
-  ec_felem_add(group, &two_z1z2, &a->Z, &b->Z);
-  felem_sqr(group, &two_z1z2, &two_z1z2);
-  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z1z1);
-  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z2z2);
-
-  // s1 = y1 * z2**3
-  EC_FELEM s1;
-  felem_mul(group, &s1, &b->Z, &z2z2);
-  felem_mul(group, &s1, &s1, &a->Y);
-
-  // u2 = x2*z1z1
-  EC_FELEM u2;
-  felem_mul(group, &u2, &b->X, &z1z1);
-
-  // h = u2 - u1
-  EC_FELEM h;
-  ec_felem_sub(group, &h, &u2, &u1);
-
-  BN_ULONG xneq = ec_felem_non_zero_mask(group, &h);
-
-  // z_out = two_z1z2 * h
-  felem_mul(group, &z_out, &h, &two_z1z2);
-
-  // z1z1z1 = z1 * z1z1
-  EC_FELEM z1z1z1;
-  felem_mul(group, &z1z1z1, &a->Z, &z1z1);
-
-  // s2 = y2 * z1**3
-  EC_FELEM s2;
-  felem_mul(group, &s2, &b->Y, &z1z1z1);
-
-  // r = (s2 - s1)*2
-  EC_FELEM r;
-  ec_felem_sub(group, &r, &s2, &s1);
-  ec_felem_add(group, &r, &r, &r);
-
-  BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);
-
-  // This case will never occur in the constant-time |ec_GFp_simple_mul|.
-  if (!xneq && !yneq && z1nz && z2nz) {
-    ec_GFp_simple_dbl(group, out, a);
-    return;
-  }
-
-  // I = (2h)**2
-  EC_FELEM i;
-  ec_felem_add(group, &i, &h, &h);
-  felem_sqr(group, &i, &i);
-
-  // J = h * I
-  EC_FELEM j;
-  felem_mul(group, &j, &h, &i);
-
-  // V = U1 * I
-  EC_FELEM v;
-  felem_mul(group, &v, &u1, &i);
-
-  // x_out = r**2 - J - 2V
-  felem_sqr(group, &x_out, &r);
-  ec_felem_sub(group, &x_out, &x_out, &j);
-  ec_felem_sub(group, &x_out, &x_out, &v);
-  ec_felem_sub(group, &x_out, &x_out, &v);
-
-  // y_out = r(V-x_out) - 2 * s1 * J
-  ec_felem_sub(group, &y_out, &v, &x_out);
-  felem_mul(group, &y_out, &y_out, &r);
-  EC_FELEM s1j;
-  felem_mul(group, &s1j, &s1, &j);
-  ec_felem_sub(group, &y_out, &y_out, &s1j);
-  ec_felem_sub(group, &y_out, &y_out, &s1j);
-
-  ec_felem_select(group, &x_out, z1nz, &x_out, &b->X);
-  ec_felem_select(group, &out->X, z2nz, &x_out, &a->X);
-  ec_felem_select(group, &y_out, z1nz, &y_out, &b->Y);
-  ec_felem_select(group, &out->Y, z2nz, &y_out, &a->Y);
-  ec_felem_select(group, &z_out, z1nz, &z_out, &b->Z);
-  ec_felem_select(group, &out->Z, z2nz, &z_out, &a->Z);
-}
-
-void ec_GFp_simple_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
-                       const EC_RAW_POINT *a) {
-  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
-                          const EC_FELEM *b) = group->meth->felem_mul;
-  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
-      group->meth->felem_sqr;
-
-  if (group->a_is_minus3) {
-    // The method is taken from:
-    //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
-    //
-    // Coq transcription and correctness proof:
-    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
-    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
-    EC_FELEM delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
-    // delta = z^2
-    felem_sqr(group, &delta, &a->Z);
-    // gamma = y^2
-    felem_sqr(group, &gamma, &a->Y);
-    // beta = x*gamma
-    felem_mul(group, &beta, &a->X, &gamma);
-
-    // alpha = 3*(x-delta)*(x+delta)
-    ec_felem_sub(group, &ftmp, &a->X, &delta);
-    ec_felem_add(group, &ftmp2, &a->X, &delta);
-
-    ec_felem_add(group, &tmptmp, &ftmp2, &ftmp2);
-    ec_felem_add(group, &ftmp2, &ftmp2, &tmptmp);
-    felem_mul(group, &alpha, &ftmp, &ftmp2);
-
-    // x' = alpha^2 - 8*beta
-    felem_sqr(group, &r->X, &alpha);
-    ec_felem_add(group, &fourbeta, &beta, &beta);
-    ec_felem_add(group, &fourbeta, &fourbeta, &fourbeta);
-    ec_felem_add(group, &tmptmp, &fourbeta, &fourbeta);
-    ec_felem_sub(group, &r->X, &r->X, &tmptmp);
-
-    // z' = (y + z)^2 - gamma - delta
-    ec_felem_add(group, &delta, &gamma, &delta);
-    ec_felem_add(group, &ftmp, &a->Y, &a->Z);
-    felem_sqr(group, &r->Z, &ftmp);
-    ec_felem_sub(group, &r->Z, &r->Z, &delta);
-
-    // y' = alpha*(4*beta - x') - 8*gamma^2
-    ec_felem_sub(group, &r->Y, &fourbeta, &r->X);
-    ec_felem_add(group, &gamma, &gamma, &gamma);
-    felem_sqr(group, &gamma, &gamma);
-    felem_mul(group, &r->Y, &alpha, &r->Y);
-    ec_felem_add(group, &gamma, &gamma, &gamma);
-    ec_felem_sub(group, &r->Y, &r->Y, &gamma);
-  } else {
-    // The method is taken from:
-    //   http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
-    //
-    // Coq transcription and correctness proof:
-    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L102>
-    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L534>
-    EC_FELEM xx, yy, yyyy, zz;
-    felem_sqr(group, &xx, &a->X);
-    felem_sqr(group, &yy, &a->Y);
-    felem_sqr(group, &yyyy, &yy);
-    felem_sqr(group, &zz, &a->Z);
-
-    // s = 2*((x_in + yy)^2 - xx - yyyy)
-    EC_FELEM s;
-    ec_felem_add(group, &s, &a->X, &yy);
-    felem_sqr(group, &s, &s);
-    ec_felem_sub(group, &s, &s, &xx);
-    ec_felem_sub(group, &s, &s, &yyyy);
-    ec_felem_add(group, &s, &s, &s);
-
-    // m = 3*xx + a*zz^2
-    EC_FELEM m;
-    felem_sqr(group, &m, &zz);
-    felem_mul(group, &m, &group->a, &m);
-    ec_felem_add(group, &m, &m, &xx);
-    ec_felem_add(group, &m, &m, &xx);
-    ec_felem_add(group, &m, &m, &xx);
-
-    // x_out = m^2 - 2*s
-    felem_sqr(group, &r->X, &m);
-    ec_felem_sub(group, &r->X, &r->X, &s);
-    ec_felem_sub(group, &r->X, &r->X, &s);
-
-    // z_out = (y_in + z_in)^2 - yy - zz
-    ec_felem_add(group, &r->Z, &a->Y, &a->Z);
-    felem_sqr(group, &r->Z, &r->Z);
-    ec_felem_sub(group, &r->Z, &r->Z, &yy);
-    ec_felem_sub(group, &r->Z, &r->Z, &zz);
-
-    // y_out = m*(s-x_out) - 8*yyyy
-    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
-    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
-    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
-    ec_felem_sub(group, &r->Y, &s, &r->X);
-    felem_mul(group, &r->Y, &r->Y, &m);
-    ec_felem_sub(group, &r->Y, &r->Y, &yyyy);
-  }
-}
-
 void ec_GFp_simple_invert(const EC_GROUP *group, EC_RAW_POINT *point) {
   ec_felem_neg(group, &point->Y, &point->Y);
 }
@@ -570,3 +350,52 @@ int ec_GFp_simple_cmp(const EC_GROUP *group, const EC_RAW_POINT *a,
   // The points are equal.
   return 0;
 }
+
+int ec_GFp_simple_mont_inv_mod_ord_vartime(const EC_GROUP *group,
+                                           EC_SCALAR *out,
+                                           const EC_SCALAR *in) {
+  // This implementation (in fact) runs in constant time,
+  // even though for this interface it is not mandatory.
+
+  // out = in^-1 in the Montgomery domain. This is
+  // |ec_scalar_to_montgomery| followed by |ec_scalar_inv_montgomery|, but
+  // |ec_scalar_inv_montgomery| followed by |ec_scalar_from_montgomery| is
+  // equivalent and slightly more efficient.
+  ec_scalar_inv_montgomery(group, out, in);
+  ec_scalar_from_montgomery(group, out, out);
+  return 1;
+}
+
+// Compares the x (affine) coordinate of the point p with x.
+// Return 1 on success 0 otherwise
+int ec_GFp_simple_cmp_x_coordinate(int *out_result, const EC_GROUP *group,
+                                   const EC_POINT *p, const BIGNUM *r,
+                                   BN_CTX *ctx) {
+  *out_result = 0;
+  int ret = 0;
+  BN_CTX_start(ctx);
+
+  BIGNUM *X = BN_CTX_get(ctx);
+  if (X == NULL) {
+    OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
+    goto out;
+  }
+
+  if (!EC_POINT_get_affine_coordinates_GFp(group, p, X, NULL, ctx)) {
+    OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
+    goto out;
+  }
+
+  if (!ec_field_element_to_scalar(group, X)) {
+    OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
+    goto out;
+  }
+
+  // The signature is correct iff |X| is equal to |r|.
+  *out_result = (BN_ucmp(X, r) == 0);
+  ret = 1;
+
+out:
+  BN_CTX_end(ctx);
+  return ret;
+}
diff --git a/src/crypto/fipsmodule/ec/simple_mul.c b/src/crypto/fipsmodule/ec/simple_mul.c
index 93ed0a8f..e05f491a 100644
--- a/src/crypto/fipsmodule/ec/simple_mul.c
+++ b/src/crypto/fipsmodule/ec/simple_mul.c
@@ -21,12 +21,12 @@
 #include "../../internal.h"
 
 
-static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
-                                     const EC_RAW_POINT *p,
-                                     const EC_SCALAR *scalar) {
+static void ec_GFp_mont_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
+                                   const EC_RAW_POINT *p,
+                                   const EC_SCALAR *scalar) {
   // This is a generic implementation for uncommon curves that not do not
   // warrant a tuned one. It uses unsigned digits so that the doubling case in
-  // |ec_GFp_simple_add| is always unreachable, erring on safety and simplicity.
+  // |ec_GFp_mont_add| is always unreachable, erring on safety and simplicity.
 
   // Compute a table of the first 32 multiples of |p| (including infinity).
   EC_RAW_POINT precomp[32];
@@ -34,9 +34,9 @@ static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
   ec_GFp_simple_point_copy(&precomp[1], p);
   for (size_t j = 2; j < OPENSSL_ARRAY_SIZE(precomp); j++) {
     if (j & 1) {
-      ec_GFp_simple_add(group, &precomp[j], &precomp[1], &precomp[j - 1]);
+      ec_GFp_mont_add(group, &precomp[j], &precomp[1], &precomp[j - 1]);
     } else {
-      ec_GFp_simple_dbl(group, &precomp[j], &precomp[j / 2]);
+      ec_GFp_mont_dbl(group, &precomp[j], &precomp[j / 2]);
     }
   }
 
@@ -45,7 +45,7 @@ static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
   int r_is_at_infinity = 1;
   for (unsigned i = bits - 1; i < bits; i--) {
     if (!r_is_at_infinity) {
-      ec_GFp_simple_dbl(group, r, r);
+      ec_GFp_mont_dbl(group, r, r);
     }
     if (i % 5 == 0) {
       // Compute the next window value.
@@ -70,7 +70,7 @@ static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
         ec_GFp_simple_point_copy(r, &tmp);
         r_is_at_infinity = 0;
       } else {
-        ec_GFp_simple_add(group, r, r, &tmp);
+        ec_GFp_mont_add(group, r, r, &tmp);
       }
     }
   }
@@ -79,21 +79,21 @@ static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
   }
 }
 
-void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
-                       const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
-                       const EC_SCALAR *p_scalar) {
+void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+                     const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                     const EC_SCALAR *p_scalar) {
   assert(g_scalar != NULL || p_scalar != NULL);
   if (p_scalar == NULL) {
-    ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar);
+    ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar);
   } else if (g_scalar == NULL) {
-    ec_GFp_simple_mul_single(group, r, p, p_scalar);
+    ec_GFp_mont_mul_single(group, r, p, p_scalar);
   } else {
     // Support constant-time two-point multiplication for compatibility.  This
     // does not actually come up in keygen, ECDH, or ECDSA, so we implement it
     // the naive way.
-    ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar);
+    ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar);
     EC_RAW_POINT tmp;
-    ec_GFp_simple_mul_single(group, &tmp, p, p_scalar);
-    ec_GFp_simple_add(group, r, r, &tmp);
+    ec_GFp_mont_mul_single(group, &tmp, p, p_scalar);
+    ec_GFp_mont_add(group, r, r, &tmp);
   }
 }
diff --git a/src/crypto/fipsmodule/ec/wnaf.c b/src/crypto/fipsmodule/ec/wnaf.c
index 145caa0c..c0c28099 100644
--- a/src/crypto/fipsmodule/ec/wnaf.c
+++ b/src/crypto/fipsmodule/ec/wnaf.c
@@ -151,9 +151,9 @@ static void compute_precomp(const EC_GROUP *group, EC_RAW_POINT *out,
                             const EC_RAW_POINT *p, size_t len) {
   ec_GFp_simple_point_copy(&out[0], p);
   EC_RAW_POINT two_p;
-  ec_GFp_simple_dbl(group, &two_p, p);
+  ec_GFp_mont_dbl(group, &two_p, p);
   for (size_t i = 1; i < len; i++) {
-    ec_GFp_simple_add(group, &out[i], &out[i - 1], &two_p);
+    ec_GFp_mont_add(group, &out[i], &out[i - 1], &two_p);
   }
 }
 
@@ -168,15 +168,15 @@ static void lookup_precomp(const EC_GROUP *group, EC_RAW_POINT *out,
   }
 }
 
-// EC_WNAF_WINDOW_BITS is the window size to use for |ec_GFp_simple_mul_public|.
+// EC_WNAF_WINDOW_BITS is the window size to use for |ec_GFp_mont_mul_public|.
 #define EC_WNAF_WINDOW_BITS 4
 
-// EC_WNAF_TABLE_SIZE is the table size to use for |ec_GFp_simple_mul_public|.
+// EC_WNAF_TABLE_SIZE is the table size to use for |ec_GFp_mont_mul_public|.
 #define EC_WNAF_TABLE_SIZE (1 << (EC_WNAF_WINDOW_BITS - 1))
 
-void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
-                              const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
-                              const EC_SCALAR *p_scalar) {
+void ec_GFp_mont_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
+                            const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                            const EC_SCALAR *p_scalar) {
   size_t bits = BN_num_bits(&group->order);
   size_t wNAF_len = bits + 1;
 
@@ -197,7 +197,7 @@ void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
   int r_is_at_infinity = 1;
   for (size_t k = wNAF_len - 1; k < wNAF_len; k--) {
     if (!r_is_at_infinity) {
-      ec_GFp_simple_dbl(group, r, r);
+      ec_GFp_mont_dbl(group, r, r);
     }
 
     if (g_wNAF[k] != 0) {
@@ -206,7 +206,7 @@ void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
         ec_GFp_simple_point_copy(r, &tmp);
         r_is_at_infinity = 0;
       } else {
-        ec_GFp_simple_add(group, r, r, &tmp);
+        ec_GFp_mont_add(group, r, r, &tmp);
       }
     }
 
@@ -216,7 +216,7 @@ void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
         ec_GFp_simple_point_copy(r, &tmp);
         r_is_at_infinity = 0;
       } else {
-        ec_GFp_simple_add(group, r, r, &tmp);
+        ec_GFp_mont_add(group, r, r, &tmp);
       }
     }
   }
diff --git a/src/crypto/fipsmodule/ecdsa/ecdsa.c b/src/crypto/fipsmodule/ecdsa/ecdsa.c
index f3ce2147..80371c3e 100644
--- a/src/crypto/fipsmodule/ecdsa/ecdsa.c
+++ b/src/crypto/fipsmodule/ecdsa/ecdsa.c
@@ -98,40 +98,6 @@ static void digest_to_scalar(const EC_GROUP *group, EC_SCALAR *out,
                           order->width);
 }
 
-// field_element_to_scalar reduces |r| modulo |group->order|. |r| must
-// previously have been reduced modulo |group->field|.
-static int field_element_to_scalar(const EC_GROUP *group, BIGNUM *r) {
-  // We must have p < 2×order, assuming p is not tiny (p >= 17). Thus rather we
-  // can reduce by performing at most one subtraction.
-  //
-  // Proof: We only work with prime order curves, so the number of points on
-  // the curve is the order. Thus Hasse's theorem gives:
-  //
-  //     |order - (p + 1)| <= 2×sqrt(p)
-  //         p + 1 - order <= 2×sqrt(p)
-  //     p + 1 - 2×sqrt(p) <= order
-  //       p + 1 - 2×(p/4)  < order       (p/4 > sqrt(p) for p >= 17)
-  //         p/2 < p/2 + 1  < order
-  //                     p  < 2×order
-  //
-  // Additionally, one can manually check this property for built-in curves. It
-  // is enforced for legacy custom curves in |EC_GROUP_set_generator|.
-  //
-  // TODO(davidben): Introduce |EC_FIELD_ELEMENT|, make this a function from
-  // |EC_FIELD_ELEMENT| to |EC_SCALAR|, and cut out the |BIGNUM|. Does this need
-  // to be constant-time for signing? |r| is the x-coordinate for kG, which is
-  // public unless k was rerolled because |s| was zero.
-  assert(!BN_is_negative(r));
-  assert(BN_cmp(r, &group->field) < 0);
-  if (BN_cmp(r, &group->order) >= 0 &&
-      !BN_sub(r, r, &group->order)) {
-    return 0;
-  }
-  assert(!BN_is_negative(r));
-  assert(BN_cmp(r, &group->order) < 0);
-  return 1;
-}
-
 ECDSA_SIG *ECDSA_SIG_new(void) {
   ECDSA_SIG *sig = OPENSSL_malloc(sizeof(ECDSA_SIG));
   if (sig == NULL) {
@@ -193,12 +159,6 @@ int ECDSA_do_verify(const uint8_t *digest, size_t digest_len,
   }
   int ret = 0;
   EC_POINT *point = NULL;
-  BN_CTX_start(ctx);
-  BIGNUM *X = BN_CTX_get(ctx);
-  if (X == NULL) {
-    OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
-    goto err;
-  }
 
   EC_SCALAR r, s, u1, u2, s_inv_mont, m;
   if (BN_is_zero(sig->r) ||
@@ -210,11 +170,7 @@ int ECDSA_do_verify(const uint8_t *digest, size_t digest_len,
   }
 
   // s_inv_mont = s^-1 in the Montgomery domain. This is
-  // |ec_scalar_to_montgomery| followed by |ec_scalar_inv_montgomery|, but
-  // |ec_scalar_inv_montgomery| followed by |ec_scalar_from_montgomery| is
-  // equivalent and slightly more efficient.
-  ec_scalar_inv_montgomery(group, &s_inv_mont, &s);
-  ec_scalar_from_montgomery(group, &s_inv_mont, &s_inv_mont);
+  ec_scalar_inv_montgomery_vartime(group, &s_inv_mont, &s);
 
   // u1 = m * s^-1 mod order
   // u2 = r * s^-1 mod order
@@ -234,16 +190,14 @@ int ECDSA_do_verify(const uint8_t *digest, size_t digest_len,
     OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
     goto err;
   }
-  if (!EC_POINT_get_affine_coordinates_GFp(group, point, X, NULL, ctx)) {
+
+  int match;
+  if (!ec_cmp_x_coordinate(&match, group, point, sig->r, ctx)) {
     OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
     goto err;
   }
-  if (!field_element_to_scalar(group, X)) {
-    OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
-    goto err;
-  }
-  // The signature is correct iff |X| is equal to |sig->r|.
-  if (BN_ucmp(X, sig->r) != 0) {
+
+  if (!match) {
     OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_BAD_SIGNATURE);
     goto err;
   }
@@ -251,7 +205,6 @@ int ECDSA_do_verify(const uint8_t *digest, size_t digest_len,
   ret = 1;
 
 err:
-  BN_CTX_end(ctx);
   BN_CTX_free(ctx);
   EC_POINT_free(point);
   return ret;
@@ -320,7 +273,7 @@ static int ecdsa_sign_setup(const EC_KEY *eckey, BN_CTX *ctx,
       goto err;
     }
 
-    if (!field_element_to_scalar(group, r)) {
+    if (!ec_field_element_to_scalar(group, r)) {
       goto err;
     }
   } while (BN_is_zero(r));
diff --git a/src/crypto/fipsmodule/ecdsa/ecdsa_test.cc b/src/crypto/fipsmodule/ecdsa/ecdsa_test.cc
index 258c128c..4c95df9e 100644
--- a/src/crypto/fipsmodule/ecdsa/ecdsa_test.cc
+++ b/src/crypto/fipsmodule/ecdsa/ecdsa_test.cc
@@ -68,6 +68,45 @@
 #include "../../test/file_test.h"
 
 
+static bssl::UniquePtr<BIGNUM> HexToBIGNUM(const char *hex) {
+  BIGNUM *bn = nullptr;
+  BN_hex2bn(&bn, hex);
+  return bssl::UniquePtr<BIGNUM>(bn);
+}
+
+// Though we do not support secp160r1, it is reachable from the deprecated
+// custom curve APIs and has some unique properties (n is larger than p with the
+// difference crossing a word boundary on 32-bit), so test it explicitly.
+static bssl::UniquePtr<EC_GROUP> NewSecp160r1Group() {
+  static const char kP[] = "ffffffffffffffffffffffffffffffff7fffffff";
+  static const char kA[] = "ffffffffffffffffffffffffffffffff7ffffffc";
+  static const char kB[] = "1c97befc54bd7a8b65acf89f81d4d4adc565fa45";
+  static const char kX[] = "4a96b5688ef573284664698968c38bb913cbfc82";
+  static const char kY[] = "23a628553168947d59dcc912042351377ac5fb32";
+  static const char kN[] = "0100000000000000000001f4c8f927aed3ca752257";
+
+  bssl::UniquePtr<BIGNUM> p = HexToBIGNUM(kP), a = HexToBIGNUM(kA),
+                          b = HexToBIGNUM(kB), x = HexToBIGNUM(kX),
+                          y = HexToBIGNUM(kY), n = HexToBIGNUM(kN);
+  if (!p || !a || !b || !x || !y || !n) {
+    return nullptr;
+  }
+
+  bssl::UniquePtr<EC_GROUP> group(
+      EC_GROUP_new_curve_GFp(p.get(), a.get(), b.get(), nullptr));
+  if (!group) {
+    return nullptr;
+  }
+  bssl::UniquePtr<EC_POINT> g(EC_POINT_new(group.get()));
+  if (!g ||
+      !EC_POINT_set_affine_coordinates_GFp(group.get(), g.get(), x.get(),
+                                           y.get(), nullptr) ||
+      !EC_GROUP_set_generator(group.get(), g.get(), n.get(), BN_value_one())) {
+    return nullptr;
+  }
+  return group;
+}
+
 enum API {
   kEncodedAPI,
   kRawAPI,
@@ -151,13 +190,18 @@ TEST(ECDSATest, BuiltinCurves) {
       { NID_X9_62_prime256v1, "secp256r1" },
       { NID_secp384r1, "secp384r1" },
       { NID_secp521r1, "secp521r1" },
+      { NID_secp160r1, "secp160r1" },
   };
 
   for (const auto &curve : kCurves) {
     SCOPED_TRACE(curve.name);
 
-    int nid = curve.nid;
-    bssl::UniquePtr<EC_GROUP> group(EC_GROUP_new_by_curve_name(nid));
+    bssl::UniquePtr<EC_GROUP> group;
+    if (curve.nid == NID_secp160r1) {
+      group = NewSecp160r1Group();
+    } else {
+      group.reset(EC_GROUP_new_by_curve_name(curve.nid));
+    }
     ASSERT_TRUE(group);
     const BIGNUM *order = EC_GROUP_get0_order(group.get());
 
@@ -278,6 +322,9 @@ static bssl::UniquePtr<EC_GROUP> GetCurve(FileTest *t, const char *key) {
   if (curve_name == "P-521") {
     return bssl::UniquePtr<EC_GROUP>(EC_GROUP_new_by_curve_name(NID_secp521r1));
   }
+  if (curve_name == "secp160r1") {
+    return NewSecp160r1Group();
+  }
 
   ADD_FAILURE() << "Unknown curve: " << curve_name;
   return nullptr;
diff --git a/src/crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt b/src/crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt
index aa2fbd3c..03975c8c 100644
--- a/src/crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt
+++ b/src/crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt
@@ -2431,3 +2431,346 @@ Y = 01862ed4f9d235afcc4e6b45e491da363104d4db7b97f12d869c40ab09a3c8c72519a9712ca7
 Digest = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
 R = 00ec0b91fa4386a8acdc0e46dd9c1d1775abbe0da8ead424aa4ace58e284a5be00e2c1ef95b6f4d861615564e1e7305656567f95275ce63b534420eae77ec37492c2
 S = 01e1099fb389db498ab4cf23b4f06a74b9326878ae3c76ea13832e50702b30fe8303093a59cc9a0995f1dfc15e6f7dabca8a2acaf03ec005447d29fb429a252064ec
+
+
+# The following tests are intended to stress the final comparison in ECDSA.
+# ECDSA verification computes some curve point (x, y), picking the fully-reduced
+# representive of x mod p, and checking that x mod n is r. (n is the order of
+# the group and p defines the underlying prime field.)
+#
+# This makes the computation sensitive to values near n and p, and which of n or
+# p is larger. Additionally, there is an optimization that performs the
+# comparison mod p rather than n and compensates for the difference.
+#
+# These tests were generated by picking a target value of r and x, adjusting
+# both until x corresponded to a point on the curve, and then computing the
+# public key by solving for P in ECDSA's (x, y) = u1*G + u2*P. The digest is the
+# hash of "hello, world" with the suitably-sized SHA-2 hash, so the test vectors
+# are suitable for both message- and digest-based APIs.
+#
+# "x" in the comments refer to the x-coordinate of the computed point, not that
+# of the public key.
+
+# r = 3, x = 3 is valid.
+Curve = P-224
+X = f43eeb550591547d6a6479726b72be181d4ea26dea5516ae1c0b0ab3
+Y = e127deeb94536c67793ac172ba31f3a6f81efbbf2ab3d7868d0cc9f9
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 00000000000000000000000000000000000000000000000000000003
+S = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3a
+
+# r = 3 + n, x = 3 is invalid. r must already be reduced.
+Curve = P-224
+X = f43eeb550591547d6a6479726b72be181d4ea26dea5516ae1c0b0ab3
+Y = e127deeb94536c67793ac172ba31f3a6f81efbbf2ab3d7868d0cc9f9
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a40
+S = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3a
+Invalid =
+
+# r = n-1, x = n-1 is the largest x without a reduction.
+Curve = P-224
+X = 32acb8d348f6ec350822227c4a90048733640317f7833dc9093a78f1
+Y = dd45cab24ef90b8d6437f128437ea847036a8912322a6738dccceaa3
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3c
+S = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3a
+
+# r = n-2, x = n-1 is incorrect.
+Curve = P-224
+X = 32acb8d348f6ec350822227c4a90048733640317f7833dc9093a78f1
+Y = dd45cab24ef90b8d6437f128437ea847036a8912322a6738dccceaa3
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3b
+S = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3a
+Invalid =
+
+# r = 3, x = n+3 is the smallest x with a reduction.
+Curve = P-224
+X = d7afcc97eefcf32becf100cf967588c68f9c149fa18344ac08e245b4
+Y = 3b853f6c6d955587d9ac080c8f10bf355f9992a0103a27aa30dac7e8
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 00000000000000000000000000000000000000000000000000000003
+S = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3a
+
+# r = 4, x = n+3 is incorrect.
+Curve = P-224
+X = d7afcc97eefcf32becf100cf967588c68f9c149fa18344ac08e245b4
+Y = 3b853f6c6d955587d9ac080c8f10bf355f9992a0103a27aa30dac7e8
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 00000000000000000000000000000000000000000000000000000004
+S = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3a
+Invalid =
+
+# r = p-3-n, x = p-3 is the largest valid x.
+Curve = P-224
+X = cdacee2255448c72d1558eb866b14831acef41ed348bd938cce655be
+Y = d0b409693b64f3597468ae5535338052436158a6771c6318b68025de
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 0000000000000000000000000000e95c1f470fc1ec22d6baa3a3d5c1
+S = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3a
+
+# r = p-n+3, x = 3 is incorrect. r is too large to compare r+n with x.
+Curve = P-224
+X = ef9169ef146a19c9a7220c6f25f597e7345e25fa1267712b9a20e30d
+Y = 454b19373a67ad81ca37ba8de9a96e881896df7160ba740f4c7373b9
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 0000000000000000000000000000e95c1f470fc1ec22d6baa3a3d5c7
+S = ffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3a
+Invalid =
+
+# r = 5, x = 5 is valid.
+Curve = P-256
+X = 264d796a0dab9b376d34eea6fe297dde1c7b73e53944bc96c8f1e8a6850bb6c9
+Y = cf5308020eed460c649ddae61d4ef8bb79958113f106befaf4f18876d12a5e64
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 0000000000000000000000000000000000000000000000000000000000000005
+S = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+
+# r = 5 + n, x = 5 is invalid. r must already be reduced.
+Curve = P-256
+X = 264d796a0dab9b376d34eea6fe297dde1c7b73e53944bc96c8f1e8a6850bb6c9
+Y = cf5308020eed460c649ddae61d4ef8bb79958113f106befaf4f18876d12a5e64
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632556
+S = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+Invalid =
+
+# r = n-2, x = n-2 is the largest x without a reduction.
+Curve = P-256
+X = 50a50c01132bf79e42b31fb278f7317b29515e9e1c973a41266b69048826fb8e
+Y = aac53e7df37b5eb25ce4ddb705fc7135c6b1e00a7f56e30744f62f258afa5537
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f
+S = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+
+# r = n-3, x = n-2 is incorrect.
+Curve = P-256
+X = 50a50c01132bf79e42b31fb278f7317b29515e9e1c973a41266b69048826fb8e
+Y = aac53e7df37b5eb25ce4ddb705fc7135c6b1e00a7f56e30744f62f258afa5537
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+S = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+Invalid =
+
+# r = 3, x = n+3 is the smallest x with a reduction.
+Curve = P-256
+X = ce24c99032d52ac6ead23c0ae3ec68ef41e51a281fd457808c83136d7dcce90e
+Y = 8f7a154b551e9f39c59279357aa491b2a62bdebc2bb78613883fc72936c057e0
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 0000000000000000000000000000000000000000000000000000000000000003
+S = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+
+# r = 4, x = n+3 is incorrect.
+Curve = P-256
+X = ce24c99032d52ac6ead23c0ae3ec68ef41e51a281fd457808c83136d7dcce90e
+Y = 8f7a154b551e9f39c59279357aa491b2a62bdebc2bb78613883fc72936c057e0
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 0000000000000000000000000000000000000000000000000000000000000004
+S = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+Invalid =
+
+# r = p-3-n, x = p-3 is the largest valid x.
+Curve = P-256
+X = 768a0d300a595005a520130e50927d403395c8e1e40be997b48fc048410f7cdb
+Y = 16f217d8e1c02bd887e5de388a17783b182e61b5d534152dc2c4be8d75fdd706
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 000000000000000000000000000000004319055358e8617b0c46353d039cdaab
+S = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+
+# r = p-n+5, x = 5 is incorrect. r is too large to compare r+n with x.
+Curve = P-256
+X = 0ec505bc19b14a43e05678cccf07a443d3e871a2e19b68a4da91859a0650f324
+Y = 77300e4f64e9982d94dff5d294428bb37cc9be66117cae9c389d2d495f68b987
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 000000000000000000000000000000004319055358e8617b0c46353d039cdab3
+S = ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254e
+Invalid =
+
+# r = 2, x = 2 is valid.
+Curve = P-384
+X = 016d2db67561bc126ad6c344d6eeb2713a9e2892c649af0f015c6b7617f160c8a3b3a88add669d7155025073c5ac5b4f
+Y = 43bf2ed0088af08645c80aa0a24a567a94ba2d794e9689d3ad4b185bc5d2dd008333e2dd2ebb5069a9b32251a3cac71e
+Digest = 1fcdb6059ce05172a26bbe2a3ccc88ed5a8cd5fc53edfd9053304d429296a6da23b1cd9e5c9ed3bb34f00418a70cdb7e
+R = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002
+S = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
+
+# r = 2 + n, x = 2 is invalid. r must already be reduced.
+Curve = P-384
+X = 016d2db67561bc126ad6c344d6eeb2713a9e2892c649af0f015c6b7617f160c8a3b3a88add669d7155025073c5ac5b4f
+Y = 43bf2ed0088af08645c80aa0a24a567a94ba2d794e9689d3ad4b185bc5d2dd008333e2dd2ebb5069a9b32251a3cac71e
+Digest = 1fcdb6059ce05172a26bbe2a3ccc88ed5a8cd5fc53edfd9053304d429296a6da23b1cd9e5c9ed3bb34f00418a70cdb7e
+R = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52975
+S = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
+Invalid =
+
+# r = n-1, x = n-1 is the largest x without a reduction.
+Curve = P-384
+X = b5b375264c09acf145ca91d12ab10a096092a41ec43f4d718e129ea1c12b2dea62c7785efc52f46f009fb1dba133e811
+Y = bc0b2af172b4b3068d032a798080e76f4d56f72069519e3c19a43682a41794e52cb3ca139348d6bbc923e6a4f7945cb1
+Digest = 1fcdb6059ce05172a26bbe2a3ccc88ed5a8cd5fc53edfd9053304d429296a6da23b1cd9e5c9ed3bb34f00418a70cdb7e
+R = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52972
+S = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
+
+# r = n-2, x = n-1 is incorrect.
+Curve = P-384
+X = b5b375264c09acf145ca91d12ab10a096092a41ec43f4d718e129ea1c12b2dea62c7785efc52f46f009fb1dba133e811
+Y = bc0b2af172b4b3068d032a798080e76f4d56f72069519e3c19a43682a41794e52cb3ca139348d6bbc923e6a4f7945cb1
+Digest = 1fcdb6059ce05172a26bbe2a3ccc88ed5a8cd5fc53edfd9053304d429296a6da23b1cd9e5c9ed3bb34f00418a70cdb7e
+R = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52971
+S = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
+Invalid =
+
+# r = 2, x = n+2 is the smallest x with a reduction.
+Curve = P-384
+X = 01b54a697305092bac2939fb906d7471b411c4eba8654169166a5da3810e1fc96795df921f7abbf519be4a027435176c
+Y = a19012a3518773d508106d4153adee43c3c384fa62ce36a4addea08f593ec9c76b09a6b9c69d29bd7d47eb48e167dd2f
+Digest = 1fcdb6059ce05172a26bbe2a3ccc88ed5a8cd5fc53edfd9053304d429296a6da23b1cd9e5c9ed3bb34f00418a70cdb7e
+R = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002
+S = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
+
+# r = 3, x = n+2 is incorrect.
+Curve = P-384
+X = 01b54a697305092bac2939fb906d7471b411c4eba8654169166a5da3810e1fc96795df921f7abbf519be4a027435176c
+Y = a19012a3518773d508106d4153adee43c3c384fa62ce36a4addea08f593ec9c76b09a6b9c69d29bd7d47eb48e167dd2f
+Digest = 1fcdb6059ce05172a26bbe2a3ccc88ed5a8cd5fc53edfd9053304d429296a6da23b1cd9e5c9ed3bb34f00418a70cdb7e
+R = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003
+S = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
+Invalid =
+
+# r = p-1-n, x = p-1 is the largest valid x.
+Curve = P-384
+X = c4fd8e68006b83f7b7b20b731ae405813aa05f6e57374589b36ae1cecd1d49cae1418c22f398188bcf4ef02e89fe7394
+Y = dd1164b3707f59e05129fa228b8448031db159985f035d93470dc42b3ab4129f0760c46cf201d42e73a7e33ba7402ea6
+Digest = 1fcdb6059ce05172a26bbe2a3ccc88ed5a8cd5fc53edfd9053304d429296a6da23b1cd9e5c9ed3bb34f00418a70cdb7e
+R = 000000000000000000000000000000000000000000000000389cb27e0bc8d21fa7e5f24cb74f58851313e696333ad68b
+S = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
+
+# r = p-n+2, x = 2 is incorrect. r is too large to compare r+n with x.
+Curve = P-384
+X = 4e5e4f1a6e97059a6cf2f4e8129e5c7c64cb84f9994a41ff5bf30b29c1bf5ba6898627c91a23c73e05cd1a43c8f908c0
+Y = 06a0aed7f1e63a728f87dbd5360a67571a076ab0b4cde81b10d499959814ddb3a8c7854b0bbfa87cc272f90bca2a2254
+Digest = 1fcdb6059ce05172a26bbe2a3ccc88ed5a8cd5fc53edfd9053304d429296a6da23b1cd9e5c9ed3bb34f00418a70cdb7e
+R = 000000000000000000000000000000000000000000000000389cb27e0bc8d21fa7e5f24cb74f58851313e696333ad68e
+S = ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970
+Invalid =
+
+# r = 1, x = 1 is valid.
+Curve = P-521
+X = 00f07e0b593332d09ec4fd0bae93f648a3da04dd224faae3f64cc490ec8fce3a6fe53d1b2c9e326be076cafb921b7e3f8b2288db491819522d65472870668c3808c9
+Y = 018e42509aca542a8de421589c38ba653e8cfd69322336217042a9dc0f67f6d7ae2cd4e385f480ffaf8981f715c7ca3765d9867dfd5a02947b0895f82eaf8b257e88
+Digest = 8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9
+R = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001
+S = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+
+# r = 1 + n, x = 1 is invalid. r must already be reduced.
+Curve = P-521
+X = 00f07e0b593332d09ec4fd0bae93f648a3da04dd224faae3f64cc490ec8fce3a6fe53d1b2c9e326be076cafb921b7e3f8b2288db491819522d65472870668c3808c9
+Y = 018e42509aca542a8de421589c38ba653e8cfd69322336217042a9dc0f67f6d7ae2cd4e385f480ffaf8981f715c7ca3765d9867dfd5a02947b0895f82eaf8b257e88
+Digest = 8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9
+R = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e9138640a
+S = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+Invalid =
+
+# r = n-2, x = n-2 is the largest x without a reduction.
+Curve = P-521
+X = 002a61afb982e49f030dd4e6ba0e495703abe0442b1283ee693fffc1b558f49f0a4cb4f138ea0604e667958495b86c61f358dce7e7f170da47372be3e4168408a260
+Y = 01baa19e8929fc8e7208e854e706a3d7f21479d1f6922a65ae3490fd5f52ae6580513b1fdd5bee927d002a9608abbb925b6727bdc110a3145fc8622d1fa8154c82d8
+Digest = 8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9
+R = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386407
+S = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+
+# r = n-3, x = n-2 is incorrect.
+Curve = P-521
+X = 002a61afb982e49f030dd4e6ba0e495703abe0442b1283ee693fffc1b558f49f0a4cb4f138ea0604e667958495b86c61f358dce7e7f170da47372be3e4168408a260
+Y = 01baa19e8929fc8e7208e854e706a3d7f21479d1f6922a65ae3490fd5f52ae6580513b1fdd5bee927d002a9608abbb925b6727bdc110a3145fc8622d1fa8154c82d8
+Digest = 8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9
+R = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+S = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+Invalid =
+
+# r = 1, x = n+1 is the smallest x with a reduction.
+Curve = P-521
+X = 0049bbb2d3267a6eab2c59fac5b138b9e9c383db6637fcfe5d9f430e4c4c2ba0332340975448bd86c92a55c1a8288adf7f774096022419aa8c497499dafee7b93257
+Y = 00bb52fd444ec497ce228135f2498d40fb84eb6f674df1245d3aaac3c75b55ff5fff8e90b6f0189a3132cb9fd8d6e74fda5866fe2b9fc7484c628fde97e0b00f2b67
+Digest = 8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9
+R = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001
+S = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+
+# r = 2, x = n+1 is incorrect.
+Curve = P-521
+X = 0049bbb2d3267a6eab2c59fac5b138b9e9c383db6637fcfe5d9f430e4c4c2ba0332340975448bd86c92a55c1a8288adf7f774096022419aa8c497499dafee7b93257
+Y = 00bb52fd444ec497ce228135f2498d40fb84eb6f674df1245d3aaac3c75b55ff5fff8e90b6f0189a3132cb9fd8d6e74fda5866fe2b9fc7484c628fde97e0b00f2b67
+Digest = 8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9
+R = 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002
+S = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+Invalid =
+
+# r = p-1-n, x = p-1 is the largest valid x.
+Curve = P-521
+X = 00f651d53d45bf6fd55a5f184e580d11259bc65200387dbc1bf7fb867d2d12a207d2962204ccf38e9d37d23ed95bd01ec576c457127766ecb8ad00342a476ea82078
+Y = 0196caedf64fbaa9a12c16836e0564e36f733957375706edb5f32911991a994c2d6a1ea5db2ee764835a9d6aff379e195f722b48e8d2b60fc50de2a5160c77c3f06c
+Digest = 8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9
+R = 00000000000000000000000000000000000000000000000000000000000000000005ae79787c40d069948033feb708f65a2fc44a36477663b851449048e16ec79bf5
+S = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+
+# r = p-n+1, x = 1 is incorrect. r is too large to compare r+n with x.
+Curve = P-521
+X = 009eeb7f956230c3744ca5b683f413009363107aad18a027fa7af6ac07a699911e94143d3ef00c0062d4187c2ea74dc9322c05431a6b7fed51ee71b047ce3a0e967c
+Y = 007d2c089a6720f7c7886ce8aa6aeb9b821adde0eb025ef63c62d37c32b2d6823c857ce7743b8181c35c8f34e6aeb4487dd693e01d69dfe883c07c25ebe89bdc4d56
+Digest = 8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9
+R = 00000000000000000000000000000000000000000000000000000000000000000005ae79787c40d069948033feb708f65a2fc44a36477663b851449048e16ec79bf7
+S = 01fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386406
+Invalid =
+
+# Although we do not support secp160r1, all our built-in curves have p > n,
+# while n > p is reachable from custom curve logic. Moreover, p and n have
+# different word widths on 32-bit machines. We include some test vectors to
+# cover these cases.
+#
+# When n > p, the reduction mod n never occurs, but an optimized implementation,
+# working mod p, may incorrectly accept, e.g., r = p+4 instead of r = 4.
+
+# r = 4, x = 4 is valid.
+Curve = secp160r1
+X = 39891bd61138e775cd012518ff00f59ae01c4733
+Y = 25026b77b1c44affb1592dcf711b4290e9404c9f
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 000000000000000000000000000000000000000004
+S = 0100000000000000000001f4c8f927aed3ca752254
+
+# r = 4 + n, x = 4 is invalid. r must already be reduced.
+Curve = secp160r1
+X = 39891bd61138e775cd012518ff00f59ae01c4733
+Y = 25026b77b1c44affb1592dcf711b4290e9404c9f
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 0100000000000000000001f4c8f927aed3ca75225b
+S = 0100000000000000000001f4c8f927aed3ca752254
+Invalid =
+
+# r = p-3, x = p-3 are the largest valid values of x and r.
+Curve = secp160r1
+X = d88d902a0d8d942333c7b846a933d4794fcb5807
+Y = d24c4f405689b86cd5c61fe104e6365d254d5222
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 00ffffffffffffffffffffffffffffffff7ffffffc
+S = 0100000000000000000001f4c8f927aed3ca752254
+
+# r = p-4, x = p-3 is incorrect.
+Curve = secp160r1
+X = d88d902a0d8d942333c7b846a933d4794fcb5807
+Y = d24c4f405689b86cd5c61fe104e6365d254d5222
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 00ffffffffffffffffffffffffffffffff7ffffffb
+S = 0100000000000000000001f4c8f927aed3ca752254
+Invalid =
+
+# r = p+4, x = 4 is incorrect. They should be compared modulo the order, not p,
+# so r >= p is never valid.
+Curve = secp160r1
+X = d8add22064027856c162243ab09ea96642975297
+Y = 8822a506712385ab3ebe5c61737c3bbb722b06b9
+Digest = 09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b
+R = 00ffffffffffffffffffffffffffffffff80000003
+S = 0100000000000000000001f4c8f927aed3ca752254
+Invalid =
diff --git a/src/include/openssl/ec.h b/src/include/openssl/ec.h
index 41a9c34c..966393ea 100644
--- a/src/include/openssl/ec.h
+++ b/src/include/openssl/ec.h
@@ -348,13 +348,15 @@ OPENSSL_EXPORT size_t EC_get_builtin_curves(EC_builtin_curve *out_curves,
 // EC_POINT_clear_free calls |EC_POINT_free|.
 OPENSSL_EXPORT void EC_POINT_clear_free(EC_POINT *point);
 
-// Old code expects to get EC_KEY from ec.h.
-#include <openssl/ec_key.h>
-
 
 #if defined(__cplusplus)
 }  // extern C
+#endif
 
+// Old code expects to get EC_KEY from ec.h.
+#include <openssl/ec_key.h>
+
+#if defined(__cplusplus)
 extern "C++" {
 
 BSSL_NAMESPACE_BEGIN
diff --git a/src/include/openssl/ssl.h b/src/include/openssl/ssl.h
index 0fa10b56..17153c20 100644
--- a/src/include/openssl/ssl.h
+++ b/src/include/openssl/ssl.h
@@ -1521,8 +1521,8 @@ OPENSSL_EXPORT int SSL_get_tls_unique(const SSL *ssl, uint8_t *out,
 // TLS 1.3 was negotiated. Otherwise, it returns zero.
 OPENSSL_EXPORT int SSL_get_extms_support(const SSL *ssl);
 
-// SSL_get_current_cipher returns the cipher used in the current outgoing
-// connection state, or NULL if the null cipher is active.
+// SSL_get_current_cipher returns cipher suite used by |ssl|, or NULL if it has
+// not been negotiated yet.
 OPENSSL_EXPORT const SSL_CIPHER *SSL_get_current_cipher(const SSL *ssl);
 
 // SSL_session_reused returns one if |ssl| performed an abbreviated handshake
diff --git a/src/ssl/handoff.cc b/src/ssl/handoff.cc
index a49d2894..4cca9818 100644
--- a/src/ssl/handoff.cc
+++ b/src/ssl/handoff.cc
@@ -37,6 +37,15 @@ static bool serialize_features(CBB *out) {
       return false;
     }
   }
+  CBB curves;
+  if (!CBB_add_asn1(out, &curves, CBS_ASN1_OCTETSTRING)) {
+    return false;
+  }
+  for (const NamedGroup& g : NamedGroups()) {
+    if (!CBB_add_u16(&curves, g.group_id)) {
+      return false;
+    }
+  }
   return CBB_flush(out);
 }
 
@@ -120,7 +129,52 @@ static bool apply_remote_features(SSL *ssl, CBS *in) {
   for (const SSL_CIPHER *unsupported_cipher : unsupported.get()) {
     ssl->config->cipher_list->Remove(unsupported_cipher);
   }
-  return sk_SSL_CIPHER_num(SSL_get_ciphers(ssl)) > 0;
+  if (sk_SSL_CIPHER_num(SSL_get_ciphers(ssl)) == 0) {
+    return false;
+  }
+
+  CBS curves;
+  if (!CBS_get_asn1(in, &curves, CBS_ASN1_OCTETSTRING)) {
+    return false;
+  }
+  Array<uint16_t> supported_curves;
+  if (!supported_curves.Init(CBS_len(&curves) / 2)) {
+    return false;
+  }
+  size_t idx = 0;
+  while (CBS_len(&curves)) {
+    uint16_t curve;
+    if (!CBS_get_u16(&curves, &curve)) {
+      return false;
+    }
+    supported_curves[idx++] = curve;
+  }
+  Span<const uint16_t> configured_curves =
+      tls1_get_grouplist(ssl->s3->hs.get());
+  Array<uint16_t> new_configured_curves;
+  if (!new_configured_curves.Init(configured_curves.size())) {
+    return false;
+  }
+  idx = 0;
+  for (uint16_t configured_curve : configured_curves) {
+    bool ok = false;
+    for (uint16_t supported_curve : supported_curves) {
+      if (supported_curve == configured_curve) {
+        ok = true;
+        break;
+      }
+    }
+    if (ok) {
+      new_configured_curves[idx++] = configured_curve;
+    }
+  }
+  if (idx == 0) {
+    return false;
+  }
+  new_configured_curves.Shrink(idx);
+  ssl->config->supported_group_list = std::move(new_configured_curves);
+
+  return true;
 }
 
 bool SSL_apply_handoff(SSL *ssl, Span<const uint8_t> handoff) {
diff --git a/src/ssl/internal.h b/src/ssl/internal.h
index 2e05ee03..fa86bda3 100644
--- a/src/ssl/internal.h
+++ b/src/ssl/internal.h
@@ -1000,6 +1000,15 @@ class SSLKeyShare {
   virtual bool Deserialize(CBS *in) { return false; }
 };
 
+struct NamedGroup {
+  int nid;
+  uint16_t group_id;
+  const char name[8], alias[11];
+};
+
+// NamedGroups returns all supported groups.
+Span<const NamedGroup> NamedGroups();
+
 // ssl_nid_to_group_id looks up the group corresponding to |nid|. On success, it
 // sets |*out_group_id| to the group ID and returns true. Otherwise, it returns
 // false.
@@ -1044,6 +1053,10 @@ bool tls_can_accept_handshake_data(const SSL *ssl, uint8_t *out_alert);
 // handshake data that has not been consumed by |get_message|.
 bool tls_has_unprocessed_handshake_data(const SSL *ssl);
 
+// tls_append_handshake_data appends |data| to the handshake buffer. It returns
+// true on success and false on allocation failure.
+bool tls_append_handshake_data(SSL *ssl, Span<const uint8_t> data);
+
 // dtls_has_unprocessed_handshake_data behaves like
 // |tls_has_unprocessed_handshake_data| for DTLS.
 bool dtls_has_unprocessed_handshake_data(const SSL *ssl);
diff --git a/src/ssl/s3_both.cc b/src/ssl/s3_both.cc
index 55e9aaa9..f835dc26 100644
--- a/src/ssl/s3_both.cc
+++ b/src/ssl/s3_both.cc
@@ -411,7 +411,7 @@ static ssl_open_record_t read_v2_client_hello(SSL *ssl, size_t *out_consumed,
   OPENSSL_memcpy(random + (SSL3_RANDOM_SIZE - rand_len), CBS_data(&challenge),
                  rand_len);
 
-  // Write out an equivalent TLS ClientHello.
+  // Write out an equivalent TLS ClientHello directly to the handshake buffer.
   size_t max_v3_client_hello = SSL3_HM_HEADER_LENGTH + 2 /* version */ +
                                SSL3_RANDOM_SIZE + 1 /* session ID length */ +
                                2 /* cipher list length */ +
@@ -419,7 +419,11 @@ static ssl_open_record_t read_v2_client_hello(SSL *ssl, size_t *out_consumed,
                                1 /* compression length */ + 1 /* compression */;
   ScopedCBB client_hello;
   CBB hello_body, cipher_suites;
-  if (!BUF_MEM_reserve(ssl->s3->hs_buf.get(), max_v3_client_hello) ||
+  if (!ssl->s3->hs_buf) {
+    ssl->s3->hs_buf.reset(BUF_MEM_new());
+  }
+  if (!ssl->s3->hs_buf ||
+      !BUF_MEM_reserve(ssl->s3->hs_buf.get(), max_v3_client_hello) ||
       !CBB_init_fixed(client_hello.get(), (uint8_t *)ssl->s3->hs_buf->data,
                       ssl->s3->hs_buf->max) ||
       !CBB_add_u8(client_hello.get(), SSL3_MT_CLIENT_HELLO) ||
@@ -539,18 +543,18 @@ bool tls_has_unprocessed_handshake_data(const SSL *ssl) {
   return ssl->s3->hs_buf && ssl->s3->hs_buf->length > msg_len;
 }
 
-ssl_open_record_t ssl3_open_handshake(SSL *ssl, size_t *out_consumed,
-                                      uint8_t *out_alert, Span<uint8_t> in) {
-  *out_consumed = 0;
+bool tls_append_handshake_data(SSL *ssl, Span<const uint8_t> data) {
   // Re-create the handshake buffer if needed.
   if (!ssl->s3->hs_buf) {
     ssl->s3->hs_buf.reset(BUF_MEM_new());
-    if (!ssl->s3->hs_buf) {
-      *out_alert = SSL_AD_INTERNAL_ERROR;
-      return ssl_open_record_error;
-    }
   }
+  return ssl->s3->hs_buf &&
+         BUF_MEM_append(ssl->s3->hs_buf.get(), data.data(), data.size());
+}
 
+ssl_open_record_t ssl3_open_handshake(SSL *ssl, size_t *out_consumed,
+                                      uint8_t *out_alert, Span<uint8_t> in) {
+  *out_consumed = 0;
   // Bypass the record layer for the first message to handle V2ClientHello.
   if (ssl->server && !ssl->s3->v2_hello_done) {
     // Ask for the first 5 bytes, the size of the TLS record header. This is
@@ -619,7 +623,7 @@ ssl_open_record_t ssl3_open_handshake(SSL *ssl, size_t *out_consumed,
   }
 
   // Append the entire handshake record to the buffer.
-  if (!BUF_MEM_append(ssl->s3->hs_buf.get(), body.data(), body.size())) {
+  if (!tls_append_handshake_data(ssl, body)) {
     *out_alert = SSL_AD_INTERNAL_ERROR;
     return ssl_open_record_error;
   }
diff --git a/src/ssl/s3_pkt.cc b/src/ssl/s3_pkt.cc
index e9b652e0..f0ae8a2e 100644
--- a/src/ssl/s3_pkt.cc
+++ b/src/ssl/s3_pkt.cc
@@ -318,11 +318,7 @@ ssl_open_record_t ssl3_open_app_data(SSL *ssl, Span<uint8_t> *out,
       return ssl_open_record_error;
     }
 
-    if (!ssl->s3->hs_buf) {
-      ssl->s3->hs_buf.reset(BUF_MEM_new());
-    }
-    if (!ssl->s3->hs_buf ||
-        !BUF_MEM_append(ssl->s3->hs_buf.get(), body.data(), body.size())) {
+    if (!tls_append_handshake_data(ssl, body)) {
       *out_alert = SSL_AD_INTERNAL_ERROR;
       return ssl_open_record_error;
     }
diff --git a/src/ssl/ssl_key_share.cc b/src/ssl/ssl_key_share.cc
index 8466eabb..80b7d0a0 100644
--- a/src/ssl/ssl_key_share.cc
+++ b/src/ssl/ssl_key_share.cc
@@ -211,11 +211,7 @@ class X25519KeyShare : public SSLKeyShare {
   uint8_t private_key_[32];
 };
 
-CONSTEXPR_ARRAY struct {
-  int nid;
-  uint16_t group_id;
-  const char name[8], alias[11];
-} kNamedGroups[] = {
+CONSTEXPR_ARRAY NamedGroup kNamedGroups[] = {
     {NID_secp224r1, SSL_CURVE_SECP224R1, "P-224", "secp224r1"},
     {NID_X9_62_prime256v1, SSL_CURVE_SECP256R1, "P-256", "prime256v1"},
     {NID_secp384r1, SSL_CURVE_SECP384R1, "P-384", "secp384r1"},
@@ -225,6 +221,10 @@ CONSTEXPR_ARRAY struct {
 
 }  // namespace
 
+Span<const NamedGroup> NamedGroups() {
+  return MakeConstSpan(kNamedGroups, OPENSSL_ARRAY_SIZE(kNamedGroups));
+}
+
 UniquePtr<SSLKeyShare> SSLKeyShare::Create(uint16_t group_id) {
   switch (group_id) {
     case SSL_CURVE_SECP224R1:
diff --git a/src/ssl/ssl_lib.cc b/src/ssl/ssl_lib.cc
index 5bd24426..8a888024 100644
--- a/src/ssl/ssl_lib.cc
+++ b/src/ssl/ssl_lib.cc
@@ -846,15 +846,7 @@ int SSL_provide_quic_data(SSL *ssl, enum ssl_encryption_level_t level,
     return 0;
   }
 
-  // Re-create the handshake buffer if needed.
-  if (!ssl->s3->hs_buf) {
-    ssl->s3->hs_buf.reset(BUF_MEM_new());
-    if (!ssl->s3->hs_buf) {
-      return 0;
-    }
-  }
-
-  return BUF_MEM_append(ssl->s3->hs_buf.get(), data, len);
+  return tls_append_handshake_data(ssl, MakeConstSpan(data, len));
 }
 
 int SSL_do_handshake(SSL *ssl) {
@@ -2278,7 +2270,8 @@ EVP_PKEY *SSL_CTX_get0_privatekey(const SSL_CTX *ctx) {
 }
 
 const SSL_CIPHER *SSL_get_current_cipher(const SSL *ssl) {
-  return ssl->s3->aead_write_ctx->cipher();
+  const SSL_SESSION *session = SSL_get_session(ssl);
+  return session == nullptr ? nullptr : session->cipher;
 }
 
 int SSL_session_reused(const SSL *ssl) {
diff --git a/src/ssl/ssl_test.cc b/src/ssl/ssl_test.cc
index f7b299ab..f945898d 100644
--- a/src/ssl/ssl_test.cc
+++ b/src/ssl/ssl_test.cc
@@ -4285,20 +4285,29 @@ TEST(SSLTest, ApplyHandoffRemovesUnsupportedCiphers) {
   // handoff is a handoff message that has been artificially modified to pretend
   // that only cipher 0x0A is supported.  When it is applied to |server|, all
   // ciphers but that one should be removed.
+  //
+  // To make a new one of these, try sticking this in the |Handoff| test above:
+  //
+  // hexdump(stderr, "", handoff.data(), handoff.size());
+  // sed -e 's/\(..\)/0x\1, /g'
+  //
+  // and modify serialize_features() to emit only cipher 0x0A.
+
   uint8_t handoff[] = {
-      0x30, 0x81, 0x8e, 0x02, 0x01, 0x00, 0x04, 0x00, 0x04, 0x81, 0x82, 0x01,
-      0x00, 0x00, 0x7e, 0x03, 0x03, 0x77, 0x62, 0x00, 0x9a, 0x13, 0x48, 0x23,
-      0x46, 0x11, 0x6c, 0x0b, 0x1c, 0x91, 0x4e, 0xbc, 0x1c, 0xff, 0x54, 0xb9,
-      0xe6, 0x3f, 0xa8, 0x8d, 0x49, 0x37, 0x7a, 0x9e, 0xbf, 0x36, 0xd5, 0x08,
-      0x24, 0x00, 0x00, 0x1e, 0xc0, 0x2b, 0xc0, 0x2f, 0xc0, 0x2c, 0xc0, 0x30,
+      0x30, 0x81, 0x9a, 0x02, 0x01, 0x00, 0x04, 0x00, 0x04, 0x81, 0x82, 0x01,
+      0x00, 0x00, 0x7e, 0x03, 0x03, 0x30, 0x8e, 0x8f, 0x79, 0xd2, 0x87, 0x39,
+      0xc2, 0x23, 0x23, 0x13, 0xca, 0x3c, 0x80, 0x44, 0xfd, 0x80, 0x83, 0x62,
+      0x3c, 0xcc, 0xf8, 0x76, 0xd3, 0x62, 0xbb, 0x54, 0xe3, 0xc4, 0x39, 0x24,
+      0xa5, 0x00, 0x00, 0x1e, 0xc0, 0x2b, 0xc0, 0x2f, 0xc0, 0x2c, 0xc0, 0x30,
       0xcc, 0xa9, 0xcc, 0xa8, 0xc0, 0x09, 0xc0, 0x13, 0xc0, 0x0a, 0xc0, 0x14,
       0x00, 0x9c, 0x00, 0x9d, 0x00, 0x2f, 0x00, 0x35, 0x00, 0x0a, 0x01, 0x00,
-      0x00, 0x37, 0xff, 0x01, 0x00, 0x01, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
-      0x23, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x14, 0x00, 0x12, 0x04, 0x03, 0x08,
-      0x04, 0x04, 0x01, 0x05, 0x03, 0x08, 0x05, 0x05, 0x01, 0x08, 0x06, 0x06,
-      0x01, 0x02, 0x01, 0x00, 0x0b, 0x00, 0x02, 0x01, 0x00, 0x00, 0x0a, 0x00,
-      0x08, 0x00, 0x06, 0x00, 0x1d, 0x00, 0x17, 0x00, 0x18, 0x04, 0x02, 0x00,
-      0x0a,
+      0x00, 0x37, 0x00, 0x17, 0x00, 0x00, 0xff, 0x01, 0x00, 0x01, 0x00, 0x00,
+      0x0a, 0x00, 0x08, 0x00, 0x06, 0x00, 0x1d, 0x00, 0x17, 0x00, 0x18, 0x00,
+      0x0b, 0x00, 0x02, 0x01, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x0d, 0x00,
+      0x14, 0x00, 0x12, 0x04, 0x03, 0x08, 0x04, 0x04, 0x01, 0x05, 0x03, 0x08,
+      0x05, 0x05, 0x01, 0x08, 0x06, 0x06, 0x01, 0x02, 0x01, 0x04, 0x02, 0x00,
+      0x0a, 0x04, 0x0a, 0x00, 0x15, 0x00, 0x17, 0x00, 0x18, 0x00, 0x19, 0x00,
+      0x1d,
   };
 
   EXPECT_EQ(20u, sk_SSL_CIPHER_num(SSL_get_ciphers(server.get())));
@@ -4307,6 +4316,43 @@ TEST(SSLTest, ApplyHandoffRemovesUnsupportedCiphers) {
   EXPECT_EQ(1u, sk_SSL_CIPHER_num(SSL_get_ciphers(server.get())));
 }
 
+TEST(SSLTest, ApplyHandoffRemovesUnsupportedCurves) {
+  bssl::UniquePtr<SSL_CTX> server_ctx(SSL_CTX_new(TLS_method()));
+  bssl::UniquePtr<SSL> server(SSL_new(server_ctx.get()));
+
+  // handoff is a handoff message that has been artificially modified to pretend
+  // that only one curve is supported.  When it is applied to |server|, all
+  // curves but that one should be removed.
+  //
+  // See |ApplyHandoffRemovesUnsupportedCiphers| for how to make a new one of
+  // these.
+  uint8_t handoff[] = {
+      0x30, 0x81, 0xc0, 0x02, 0x01, 0x00, 0x04, 0x00, 0x04, 0x81, 0x82, 0x01,
+      0x00, 0x00, 0x7e, 0x03, 0x03, 0x98, 0x30, 0xce, 0xd9, 0xb0, 0xdf, 0x5f,
+      0x82, 0x05, 0x4a, 0x43, 0x67, 0x7e, 0xdb, 0x6a, 0x4f, 0x21, 0x18, 0x4e,
+      0x0d, 0x94, 0x63, 0x18, 0x8b, 0x54, 0x89, 0xdb, 0x8b, 0x1d, 0x84, 0xbc,
+      0x09, 0x00, 0x00, 0x1e, 0xc0, 0x2b, 0xc0, 0x2f, 0xc0, 0x2c, 0xc0, 0x30,
+      0xcc, 0xa9, 0xcc, 0xa8, 0xc0, 0x09, 0xc0, 0x13, 0xc0, 0x0a, 0xc0, 0x14,
+      0x00, 0x9c, 0x00, 0x9d, 0x00, 0x2f, 0x00, 0x35, 0x00, 0x0a, 0x01, 0x00,
+      0x00, 0x37, 0x00, 0x17, 0x00, 0x00, 0xff, 0x01, 0x00, 0x01, 0x00, 0x00,
+      0x0a, 0x00, 0x08, 0x00, 0x06, 0x00, 0x1d, 0x00, 0x17, 0x00, 0x18, 0x00,
+      0x0b, 0x00, 0x02, 0x01, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x0d, 0x00,
+      0x14, 0x00, 0x12, 0x04, 0x03, 0x08, 0x04, 0x04, 0x01, 0x05, 0x03, 0x08,
+      0x05, 0x05, 0x01, 0x08, 0x06, 0x06, 0x01, 0x02, 0x01, 0x04, 0x30, 0x00,
+      0x02, 0x00, 0x0a, 0x00, 0x2f, 0x00, 0x35, 0x00, 0x8c, 0x00, 0x8d, 0x00,
+      0x9c, 0x00, 0x9d, 0x13, 0x01, 0x13, 0x02, 0x13, 0x03, 0xc0, 0x09, 0xc0,
+      0x0a, 0xc0, 0x13, 0xc0, 0x14, 0xc0, 0x2b, 0xc0, 0x2c, 0xc0, 0x2f, 0xc0,
+      0x30, 0xc0, 0x35, 0xc0, 0x36, 0xcc, 0xa8, 0xcc, 0xa9, 0xcc, 0xac, 0x04,
+      0x02, 0x00, 0x17,
+  };
+
+  // The zero length means that the default list of groups is used.
+  EXPECT_EQ(0u, server->config->supported_group_list.size());
+  ASSERT_TRUE(
+      SSL_apply_handoff(server.get(), {handoff, OPENSSL_ARRAY_SIZE(handoff)}));
+  EXPECT_EQ(1u, server->config->supported_group_list.size());
+}
+
 TEST_P(SSLVersionTest, VerifyBeforeCertRequest) {
   // Configure the server to request client certificates.
   SSL_CTX_set_custom_verify(
@@ -4498,7 +4544,8 @@ class MockQUICTransport {
 
   bool PeerSecretsMatch(ssl_encryption_level_t level) const {
     return levels_[level].write_secret == peer_->levels_[level].read_secret &&
-           levels_[level].read_secret == peer_->levels_[level].write_secret;
+           levels_[level].read_secret == peer_->levels_[level].write_secret &&
+           levels_[level].cipher == peer_->levels_[level].cipher;
   }
 
   bool HasSecrets(ssl_encryption_level_t level) const {
@@ -4508,11 +4555,18 @@ class MockQUICTransport {
 
   bool SetEncryptionSecrets(ssl_encryption_level_t level,
                             const uint8_t *read_secret,
-                            const uint8_t *write_secret, size_t secret_len) {
+                            const uint8_t *write_secret, size_t secret_len,
+                            const SSL_CIPHER *cipher) {
     if (HasSecrets(level)) {
       ADD_FAILURE() << "duplicate keys configured";
       return false;
     }
+
+    if (cipher == nullptr) {
+      ADD_FAILURE() << "current cipher unavailable";
+      return false;
+    }
+
     if (level != ssl_encryption_early_data &&
         (read_secret == nullptr || write_secret == nullptr)) {
       ADD_FAILURE() << "key was unexpectedly null";
@@ -4525,6 +4579,7 @@ class MockQUICTransport {
       levels_[level].write_secret.assign(write_secret,
                                          write_secret + secret_len);
     }
+    levels_[level].cipher = SSL_CIPHER_get_id(cipher);
     return true;
   }
 
@@ -4572,6 +4627,10 @@ class MockQUICTransport {
       ADD_FAILURE() << "peer write key does not match read key";
       return false;
     }
+    if (peer_->levels_[level].cipher != levels_[level].cipher) {
+      ADD_FAILURE() << "peer cipher does not match";
+      return false;
+    }
     std::vector<uint8_t> *peer_data = &peer_->levels_[level].write_data;
     num = std::min(num, peer_data->size());
     out->assign(peer_data->begin(), peer_data->begin() + num);
@@ -4590,6 +4649,7 @@ class MockQUICTransport {
     std::vector<uint8_t> write_data;
     std::vector<uint8_t> write_secret;
     std::vector<uint8_t> read_secret;
+    uint32_t cipher = 0;
   };
   Level levels_[kNumQUICLevels];
 };
@@ -4675,8 +4735,8 @@ class QUICMethodTest : public testing::Test {
                                           const uint8_t *read_key,
                                           const uint8_t *write_key,
                                           size_t key_len) {
-    return TransportFromSSL(ssl)->SetEncryptionSecrets(level, read_key,
-                                                       write_key, key_len);
+    return TransportFromSSL(ssl)->SetEncryptionSecrets(
+        level, read_key, write_key, key_len, SSL_get_current_cipher(ssl));
   }
 
   static int AddHandshakeDataCallback(SSL *ssl,
diff --git a/src/ssl/test/test_config.cc b/src/ssl/test/test_config.cc
index ef24ca00..52e6cf76 100644
--- a/src/ssl/test/test_config.cc
+++ b/src/ssl/test/test_config.cc
@@ -1638,10 +1638,5 @@ bssl::UniquePtr<SSL> TestConfig::NewSSL(
     }
   }
 
-  if (SSL_get_current_cipher(ssl.get()) != nullptr) {
-    fprintf(stderr, "non-null cipher before handshake\n");
-    return nullptr;
-  }
-
   return ssl;
 }
diff --git a/src/third_party/fiat/p256.c b/src/third_party/fiat/p256.c
index f42f8fe2..c8e42a31 100644
--- a/src/third_party/fiat/p256.c
+++ b/src/third_party/fiat/p256.c
@@ -895,14 +895,6 @@ static void fe_from_montgomery(fe x) {
   fe_mul(x, x, kOne);
 }
 
-// BN_* compatability wrappers
-
-static BIGNUM *fe_to_BN(BIGNUM *out, const fe in) {
-  uint8_t tmp[NBYTES];
-  fe_tobytes(tmp, in);
-  return BN_le2bn(tmp, NBYTES, out);
-}
-
 static void fe_from_generic(fe out, const EC_FELEM *in) {
   fe_frombytes(out, in->bytes);
 }
@@ -1631,8 +1623,8 @@ static void batch_mul(fe x_out, fe y_out, fe z_out,
 // Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
 // (X/Z^2, Y/Z^3).
 static int ec_GFp_nistp256_point_get_affine_coordinates(
-    const EC_GROUP *group, const EC_RAW_POINT *point, BIGNUM *x_out,
-    BIGNUM *y_out) {
+    const EC_GROUP *group, const EC_RAW_POINT *point, EC_FELEM *x_out,
+    EC_FELEM *y_out) {
   if (ec_GFp_simple_is_at_infinity(group, point)) {
     OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
     return 0;
@@ -1652,10 +1644,7 @@ static int ec_GFp_nistp256_point_get_affine_coordinates(
     fe x;
     fe_from_generic(x, &point->X);
     fe_mul(x, x, z1);
-    if (!fe_to_BN(x_out, x)) {
-      OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-      return 0;
-    }
+    fe_to_generic(x_out, x);
   }
 
   if (y_out != NULL) {
@@ -1663,15 +1652,39 @@ static int ec_GFp_nistp256_point_get_affine_coordinates(
     fe_from_generic(y, &point->Y);
     fe_mul(z1, z1, z2);
     fe_mul(y, y, z1);
-    if (!fe_to_BN(y_out, y)) {
-      OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-      return 0;
-    }
+    fe_to_generic(y_out, y);
   }
 
   return 1;
 }
 
+static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_RAW_POINT *r,
+                                const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
+  fe x1, y1, z1, x2, y2, z2;
+  fe_from_generic(x1, &a->X);
+  fe_from_generic(y1, &a->Y);
+  fe_from_generic(z1, &a->Z);
+  fe_from_generic(x2, &b->X);
+  fe_from_generic(y2, &b->Y);
+  fe_from_generic(z2, &b->Z);
+  point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, z2);
+  fe_to_generic(&r->X, x1);
+  fe_to_generic(&r->Y, y1);
+  fe_to_generic(&r->Z, z1);
+}
+
+static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
+                                const EC_RAW_POINT *a) {
+  fe x, y, z;
+  fe_from_generic(x, &a->X);
+  fe_from_generic(y, &a->Y);
+  fe_from_generic(z, &a->Z);
+  point_double(x, y, z, x, y, z);
+  fe_to_generic(&r->X, x);
+  fe_to_generic(&r->Y, y);
+  fe_to_generic(&r->Z, z);
+}
+
 static void ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
                                        const EC_SCALAR *g_scalar,
                                        const EC_RAW_POINT *p,
@@ -1800,6 +1813,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
   out->group_set_curve = ec_GFp_mont_group_set_curve;
   out->point_get_affine_coordinates =
     ec_GFp_nistp256_point_get_affine_coordinates;
+  out->add = ec_GFp_nistp256_add;
+  out->dbl = ec_GFp_nistp256_dbl;
   out->mul = ec_GFp_nistp256_points_mul;
   out->mul_public = ec_GFp_nistp256_point_mul_public;
   out->felem_mul = ec_GFp_mont_felem_mul;
@@ -1807,6 +1822,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
   out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
   out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
   out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
+  out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
+  out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
 };
 
 #undef BORINGSSL_NISTP256_64BIT
author	Robert Sloan <varomodt@google.com>	2018-11-12 13:38:50 -0800
committer	Robert Sloan <varomodt@google.com>	2018-11-12 13:51:26 -0800
commit	a51059f202525842fc0d628a408ad5a5e33a54e7 (patch)
tree	5b28ee9547e63d78299aaedf791cc293a8518f07 /src
parent	cbf5ea62f9677735fb503a0a23ab3ee8c15ef40e (diff)
download	boringssl-a51059f202525842fc0d628a408ad5a5e33a54e7.tar.gz