diff options
author | Robert Sloan <varomodt@google.com> | 2018-02-05 09:07:34 -0800 |
---|---|---|
committer | Robert Sloan <varomodt@google.com> | 2018-02-05 09:07:39 -0800 |
commit | 8542c08a00c332af2ebca2a0c64b8d4d5fbd4cd2 (patch) | |
tree | 65345a0acda3104c65b39662f207fbc9239e9ad5 /src/crypto | |
parent | 309a31e32558286a3b92c754bd3051b962527c25 (diff) | |
download | boringssl-8542c08a00c332af2ebca2a0c64b8d4d5fbd4cd2.tar.gz |
external/boringssl: Sync to 45210dd4e21ace9d28cb76b3f83303fcdd2efcce.
This includes the following changes:
https://boringssl.googlesource.com/boringssl/+log/a62dbf88d8a3c04446db833a1eb80a620cb1514d..45210dd4e21ace9d28cb76b3f83303fcdd2efcce
Test: BoringSSL CTS Presubmits.
Change-Id: I2f3cc22fb906078f64bc2af020fa14c3d0875c81
Diffstat (limited to 'src/crypto')
22 files changed, 689 insertions, 2510 deletions
diff --git a/src/crypto/curve25519/CMakeLists.txt b/src/crypto/curve25519/CMakeLists.txt index 6f51d54f..4894fa8e 100644 --- a/src/crypto/curve25519/CMakeLists.txt +++ b/src/crypto/curve25519/CMakeLists.txt @@ -8,21 +8,12 @@ if (${ARCH} STREQUAL "arm") ) endif() -if (${ARCH} STREQUAL "x86_64") - set( - CURVE25519_ARCH_SOURCES - - asm/x25519-asm-x86_64.S - ) -endif() - add_library( curve25519 OBJECT spake25519.c - x25519-x86_64.c ${CURVE25519_ARCH_SOURCES} ) diff --git a/src/crypto/curve25519/asm/x25519-asm-x86_64.S b/src/crypto/curve25519/asm/x25519-asm-x86_64.S deleted file mode 100644 index 6cff53ee..00000000 --- a/src/crypto/curve25519/asm/x25519-asm-x86_64.S +++ /dev/null @@ -1,1894 +0,0 @@ -/* Copyright (c) 2015, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -/* This file is adapted from crypto_scalarmult/curve25519/amd64-51/ in - * SUPERCOP 20141124 (http://bench.cr.yp.to/supercop.html). That code is public - * domain licensed but the standard ISC license is included above to keep - * licensing simple. */ - -#if !defined(OPENSSL_NO_ASM) -#if defined(__x86_64__) - -.data -.p2align 4 - -#if defined(__APPLE__) -/* OS X's C ABI prefixes functions with underscore. */ -#define C_ABI(x) _ ## x -#define HIDDEN .private_extern -#else -#define C_ABI(x) x -#define HIDDEN .hidden -#endif - -x25519_x86_64_REDMASK51: .quad 0x0007FFFFFFFFFFFF -x25519_x86_64_121666_213: .quad 996687872 -x25519_x86_64_2P0: .quad 0xFFFFFFFFFFFDA -x25519_x86_64_2P1234: .quad 0xFFFFFFFFFFFFE -x25519_x86_64_4P0: .quad 0x1FFFFFFFFFFFB4 -x25519_x86_64_4P1234: .quad 0x1FFFFFFFFFFFFC -x25519_x86_64_MU0: .quad 0xED9CE5A30A2C131B -x25519_x86_64_MU1: .quad 0x2106215D086329A7 -x25519_x86_64_MU2: .quad 0xFFFFFFFFFFFFFFEB -x25519_x86_64_MU3: .quad 0xFFFFFFFFFFFFFFFF -x25519_x86_64_MU4: .quad 0x000000000000000F -x25519_x86_64_ORDER0: .quad 0x5812631A5CF5D3ED -x25519_x86_64_ORDER1: .quad 0x14DEF9DEA2F79CD6 -x25519_x86_64_ORDER2: .quad 0x0000000000000000 -x25519_x86_64_ORDER3: .quad 0x1000000000000000 -x25519_x86_64_EC2D0: .quad 1859910466990425 -x25519_x86_64_EC2D1: .quad 932731440258426 -x25519_x86_64_EC2D2: .quad 1072319116312658 -x25519_x86_64_EC2D3: .quad 1815898335770999 -x25519_x86_64_EC2D4: .quad 633789495995903 -x25519_x86_64__38: .quad 38 - -.text -.p2align 5 - -.globl C_ABI(x25519_x86_64_freeze) -HIDDEN C_ABI(x25519_x86_64_freeze) -C_ABI(x25519_x86_64_freeze): -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq 0(%rdi),%rsi -movq 8(%rdi),%rdx -movq 16(%rdi),%rcx -movq 24(%rdi),%r8 -movq 32(%rdi),%r9 -movq x25519_x86_64_REDMASK51(%rip),%rax -mov %rax,%r10 -sub $18,%r10 -mov $3,%r11 -._reduceloop: -mov %rsi,%r12 -shr $51,%r12 -and %rax,%rsi -add %r12,%rdx -mov %rdx,%r12 -shr $51,%r12 -and %rax,%rdx -add %r12,%rcx -mov %rcx,%r12 -shr $51,%r12 -and %rax,%rcx -add %r12,%r8 -mov %r8,%r12 -shr $51,%r12 -and %rax,%r8 -add %r12,%r9 -mov %r9,%r12 -shr $51,%r12 -and %rax,%r9 -imulq $19,%r12,%r12 -add %r12,%rsi -sub $1,%r11 -ja ._reduceloop -mov $1,%r12 -cmp %r10,%rsi -cmovl %r11,%r12 -cmp %rax,%rdx -cmovne %r11,%r12 -cmp %rax,%rcx -cmovne %r11,%r12 -cmp %rax,%r8 -cmovne %r11,%r12 -cmp %rax,%r9 -cmovne %r11,%r12 -neg %r12 -and %r12,%rax -and %r12,%r10 -sub %r10,%rsi -sub %rax,%rdx -sub %rax,%rcx -sub %rax,%r8 -sub %rax,%r9 -movq %rsi,0(%rdi) -movq %rdx,8(%rdi) -movq %rcx,16(%rdi) -movq %r8,24(%rdi) -movq %r9,32(%rdi) -movq -8(%rsp),%r12 -ret -.cfi_endproc - -.p2align 5 -.globl C_ABI(x25519_x86_64_mul) -HIDDEN C_ABI(x25519_x86_64_mul) -C_ABI(x25519_x86_64_mul): -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq %r13,-16(%rsp) -.cfi_rel_offset r13, -16 -movq %r14,-24(%rsp) -.cfi_rel_offset r14, -24 -movq %r15,-32(%rsp) -.cfi_rel_offset r15, -32 -movq %rbx,-40(%rsp) -.cfi_rel_offset rbx, -40 -movq %rbp,-48(%rsp) -.cfi_rel_offset rbp, -48 -mov %rdx,%rcx -movq 24(%rsi),%rdx -imulq $19,%rdx,%rax -movq %rax,-64(%rsp) -mulq 16(%rcx) -mov %rax,%r8 -mov %rdx,%r9 -movq 32(%rsi),%rdx -imulq $19,%rdx,%rax -movq %rax,-72(%rsp) -mulq 8(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsi),%rax -mulq 0(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsi),%rax -mulq 8(%rcx) -mov %rax,%r10 -mov %rdx,%r11 -movq 0(%rsi),%rax -mulq 16(%rcx) -mov %rax,%r12 -mov %rdx,%r13 -movq 0(%rsi),%rax -mulq 24(%rcx) -mov %rax,%r14 -mov %rdx,%r15 -movq 0(%rsi),%rax -mulq 32(%rcx) -mov %rax,%rbx -mov %rdx,%rbp -movq 8(%rsi),%rax -mulq 0(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsi),%rax -mulq 8(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 8(%rsi),%rax -mulq 16(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsi),%rax -mulq 24(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 8(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 16(%rsi),%rax -mulq 0(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 16(%rsi),%rax -mulq 8(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 16(%rsi),%rax -mulq 16(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 16(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 16(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 24(%rsi),%rax -mulq 0(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 24(%rsi),%rax -mulq 8(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq -64(%rsp),%rax -mulq 24(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq -64(%rsp),%rax -mulq 32(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 32(%rsi),%rax -mulq 0(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq -72(%rsp),%rax -mulq 16(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq -72(%rsp),%rax -mulq 24(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq -72(%rsp),%rax -mulq 32(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq x25519_x86_64_REDMASK51(%rip),%rsi -shld $13,%r8,%r9 -and %rsi,%r8 -shld $13,%r10,%r11 -and %rsi,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rsi,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rsi,%r14 -add %r13,%r14 -shld $13,%rbx,%rbp -and %rsi,%rbx -add %r15,%rbx -imulq $19,%rbp,%rdx -add %rdx,%r8 -mov %r8,%rdx -shr $51,%rdx -add %r10,%rdx -mov %rdx,%rcx -shr $51,%rdx -and %rsi,%r8 -add %r12,%rdx -mov %rdx,%r9 -shr $51,%rdx -and %rsi,%rcx -add %r14,%rdx -mov %rdx,%rax -shr $51,%rdx -and %rsi,%r9 -add %rbx,%rdx -mov %rdx,%r10 -shr $51,%rdx -and %rsi,%rax -imulq $19,%rdx,%rdx -add %rdx,%r8 -and %rsi,%r10 -movq %r8,0(%rdi) -movq %rcx,8(%rdi) -movq %r9,16(%rdi) -movq %rax,24(%rdi) -movq %r10,32(%rdi) -movq -8(%rsp),%r12 -movq -16(%rsp),%r13 -movq -24(%rsp),%r14 -movq -32(%rsp),%r15 -movq -40(%rsp),%rbx -movq -48(%rsp),%rbp -ret -.cfi_endproc - -.p2align 5 -.globl C_ABI(x25519_x86_64_square) -HIDDEN C_ABI(x25519_x86_64_square) -C_ABI(x25519_x86_64_square): -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq %r13,-16(%rsp) -.cfi_rel_offset r13, -16 -movq %r14,-24(%rsp) -.cfi_rel_offset r14, -24 -movq %r15,-32(%rsp) -.cfi_rel_offset r15, -32 -movq %rbx,-40(%rsp) -.cfi_rel_offset rbx, -40 -movq 0(%rsi),%rax -mulq 0(%rsi) -mov %rax,%rcx -mov %rdx,%r8 -movq 0(%rsi),%rax -shl $1,%rax -mulq 8(%rsi) -mov %rax,%r9 -mov %rdx,%r10 -movq 0(%rsi),%rax -shl $1,%rax -mulq 16(%rsi) -mov %rax,%r11 -mov %rdx,%r12 -movq 0(%rsi),%rax -shl $1,%rax -mulq 24(%rsi) -mov %rax,%r13 -mov %rdx,%r14 -movq 0(%rsi),%rax -shl $1,%rax -mulq 32(%rsi) -mov %rax,%r15 -mov %rdx,%rbx -movq 8(%rsi),%rax -mulq 8(%rsi) -add %rax,%r11 -adc %rdx,%r12 -movq 8(%rsi),%rax -shl $1,%rax -mulq 16(%rsi) -add %rax,%r13 -adc %rdx,%r14 -movq 8(%rsi),%rax -shl $1,%rax -mulq 24(%rsi) -add %rax,%r15 -adc %rdx,%rbx -movq 8(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%rcx -adc %rdx,%r8 -movq 16(%rsi),%rax -mulq 16(%rsi) -add %rax,%r15 -adc %rdx,%rbx -movq 16(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 24(%rsi) -add %rax,%rcx -adc %rdx,%r8 -movq 16(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%r9 -adc %rdx,%r10 -movq 24(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsi) -add %rax,%r9 -adc %rdx,%r10 -movq 24(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%r11 -adc %rdx,%r12 -movq 32(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsi) -add %rax,%r13 -adc %rdx,%r14 -movq x25519_x86_64_REDMASK51(%rip),%rsi -shld $13,%rcx,%r8 -and %rsi,%rcx -shld $13,%r9,%r10 -and %rsi,%r9 -add %r8,%r9 -shld $13,%r11,%r12 -and %rsi,%r11 -add %r10,%r11 -shld $13,%r13,%r14 -and %rsi,%r13 -add %r12,%r13 -shld $13,%r15,%rbx -and %rsi,%r15 -add %r14,%r15 -imulq $19,%rbx,%rdx -add %rdx,%rcx -mov %rcx,%rdx -shr $51,%rdx -add %r9,%rdx -and %rsi,%rcx -mov %rdx,%r8 -shr $51,%rdx -add %r11,%rdx -and %rsi,%r8 -mov %rdx,%r9 -shr $51,%rdx -add %r13,%rdx -and %rsi,%r9 -mov %rdx,%rax -shr $51,%rdx -add %r15,%rdx -and %rsi,%rax -mov %rdx,%r10 -shr $51,%rdx -imulq $19,%rdx,%rdx -add %rdx,%rcx -and %rsi,%r10 -movq %rcx,0(%rdi) -movq %r8,8(%rdi) -movq %r9,16(%rdi) -movq %rax,24(%rdi) -movq %r10,32(%rdi) -movq -8(%rsp),%r12 -movq -16(%rsp),%r13 -movq -24(%rsp),%r14 -movq -32(%rsp),%r15 -movq -40(%rsp),%rbx -ret -.cfi_endproc - -.p2align 5 -.globl C_ABI(x25519_x86_64_ladderstep) -HIDDEN C_ABI(x25519_x86_64_ladderstep) -C_ABI(x25519_x86_64_ladderstep): -.cfi_startproc -sub $344,%rsp -.cfi_adjust_cfa_offset 344 -movq %r12,296(%rsp) -.cfi_rel_offset r12, 296 -movq %r13,304(%rsp) -.cfi_rel_offset r13, 304 -movq %r14,312(%rsp) -.cfi_rel_offset r14, 312 -movq %r15,320(%rsp) -.cfi_rel_offset r15, 320 -movq %rbx,328(%rsp) -.cfi_rel_offset rbx, 328 -movq %rbp,336(%rsp) -.cfi_rel_offset rbp, 336 -movq 40(%rdi),%rsi -movq 48(%rdi),%rdx -movq 56(%rdi),%rcx -movq 64(%rdi),%r8 -movq 72(%rdi),%r9 -mov %rsi,%rax -mov %rdx,%r10 -mov %rcx,%r11 -mov %r8,%r12 -mov %r9,%r13 -add x25519_x86_64_2P0(%rip),%rax -add x25519_x86_64_2P1234(%rip),%r10 -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 80(%rdi),%rsi -addq 88(%rdi),%rdx -addq 96(%rdi),%rcx -addq 104(%rdi),%r8 -addq 112(%rdi),%r9 -subq 80(%rdi),%rax -subq 88(%rdi),%r10 -subq 96(%rdi),%r11 -subq 104(%rdi),%r12 -subq 112(%rdi),%r13 -movq %rsi,0(%rsp) -movq %rdx,8(%rsp) -movq %rcx,16(%rsp) -movq %r8,24(%rsp) -movq %r9,32(%rsp) -movq %rax,40(%rsp) -movq %r10,48(%rsp) -movq %r11,56(%rsp) -movq %r12,64(%rsp) -movq %r13,72(%rsp) -movq 40(%rsp),%rax -mulq 40(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 40(%rsp),%rax -shl $1,%rax -mulq 48(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 40(%rsp),%rax -shl $1,%rax -mulq 56(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 40(%rsp),%rax -shl $1,%rax -mulq 64(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 40(%rsp),%rax -shl $1,%rax -mulq 72(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 48(%rsp),%rax -mulq 48(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 48(%rsp),%rax -shl $1,%rax -mulq 56(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 48(%rsp),%rax -shl $1,%rax -mulq 64(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 48(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 56(%rsp),%rax -mulq 56(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 56(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 64(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 56(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 64(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 64(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 64(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 72(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,80(%rsp) -movq %r8,88(%rsp) -movq %r9,96(%rsp) -movq %rax,104(%rsp) -movq %r10,112(%rsp) -movq 0(%rsp),%rax -mulq 0(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 0(%rsp),%rax -shl $1,%rax -mulq 8(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 0(%rsp),%rax -shl $1,%rax -mulq 16(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 0(%rsp),%rax -shl $1,%rax -mulq 24(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 0(%rsp),%rax -shl $1,%rax -mulq 32(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 8(%rsp),%rax -mulq 8(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -shl $1,%rax -mulq 16(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 8(%rsp),%rax -shl $1,%rax -mulq 24(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 16(%rsp),%rax -mulq 16(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 16(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 24(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 16(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 24(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 24(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 32(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,120(%rsp) -movq %r8,128(%rsp) -movq %r9,136(%rsp) -movq %rax,144(%rsp) -movq %r10,152(%rsp) -mov %rsi,%rsi -mov %r8,%rdx -mov %r9,%rcx -mov %rax,%r8 -mov %r10,%r9 -add x25519_x86_64_2P0(%rip),%rsi -add x25519_x86_64_2P1234(%rip),%rdx -add x25519_x86_64_2P1234(%rip),%rcx -add x25519_x86_64_2P1234(%rip),%r8 -add x25519_x86_64_2P1234(%rip),%r9 -subq 80(%rsp),%rsi -subq 88(%rsp),%rdx -subq 96(%rsp),%rcx -subq 104(%rsp),%r8 -subq 112(%rsp),%r9 -movq %rsi,160(%rsp) -movq %rdx,168(%rsp) -movq %rcx,176(%rsp) -movq %r8,184(%rsp) -movq %r9,192(%rsp) -movq 120(%rdi),%rsi -movq 128(%rdi),%rdx -movq 136(%rdi),%rcx -movq 144(%rdi),%r8 -movq 152(%rdi),%r9 -mov %rsi,%rax -mov %rdx,%r10 -mov %rcx,%r11 -mov %r8,%r12 -mov %r9,%r13 -add x25519_x86_64_2P0(%rip),%rax -add x25519_x86_64_2P1234(%rip),%r10 -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 160(%rdi),%rsi -addq 168(%rdi),%rdx -addq 176(%rdi),%rcx -addq 184(%rdi),%r8 -addq 192(%rdi),%r9 -subq 160(%rdi),%rax -subq 168(%rdi),%r10 -subq 176(%rdi),%r11 -subq 184(%rdi),%r12 -subq 192(%rdi),%r13 -movq %rsi,200(%rsp) -movq %rdx,208(%rsp) -movq %rcx,216(%rsp) -movq %r8,224(%rsp) -movq %r9,232(%rsp) -movq %rax,240(%rsp) -movq %r10,248(%rsp) -movq %r11,256(%rsp) -movq %r12,264(%rsp) -movq %r13,272(%rsp) -movq 224(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,280(%rsp) -mulq 56(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 232(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,288(%rsp) -mulq 48(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 200(%rsp),%rax -mulq 40(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 200(%rsp),%rax -mulq 48(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 200(%rsp),%rax -mulq 56(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 200(%rsp),%rax -mulq 64(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 200(%rsp),%rax -mulq 72(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 208(%rsp),%rax -mulq 40(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 208(%rsp),%rax -mulq 48(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 208(%rsp),%rax -mulq 56(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 208(%rsp),%rax -mulq 64(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 208(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 216(%rsp),%rax -mulq 40(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 216(%rsp),%rax -mulq 48(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 216(%rsp),%rax -mulq 56(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 216(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 64(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 216(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 224(%rsp),%rax -mulq 40(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 224(%rsp),%rax -mulq 48(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 280(%rsp),%rax -mulq 64(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 280(%rsp),%rax -mulq 72(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 232(%rsp),%rax -mulq 40(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 288(%rsp),%rax -mulq 56(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 288(%rsp),%rax -mulq 64(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 288(%rsp),%rax -mulq 72(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,40(%rsp) -movq %r8,48(%rsp) -movq %r9,56(%rsp) -movq %rax,64(%rsp) -movq %r10,72(%rsp) -movq 264(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,200(%rsp) -mulq 16(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 272(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,208(%rsp) -mulq 8(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 240(%rsp),%rax -mulq 0(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 240(%rsp),%rax -mulq 8(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 240(%rsp),%rax -mulq 16(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 240(%rsp),%rax -mulq 24(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 240(%rsp),%rax -mulq 32(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 248(%rsp),%rax -mulq 0(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 248(%rsp),%rax -mulq 8(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 248(%rsp),%rax -mulq 16(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 248(%rsp),%rax -mulq 24(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 248(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 256(%rsp),%rax -mulq 0(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 256(%rsp),%rax -mulq 8(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 256(%rsp),%rax -mulq 16(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 256(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 256(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 264(%rsp),%rax -mulq 0(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 264(%rsp),%rax -mulq 8(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 200(%rsp),%rax -mulq 24(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 200(%rsp),%rax -mulq 32(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 272(%rsp),%rax -mulq 0(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 208(%rsp),%rax -mulq 16(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 208(%rsp),%rax -mulq 24(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 208(%rsp),%rax -mulq 32(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -mov %rsi,%rdx -mov %r8,%rcx -mov %r9,%r11 -mov %rax,%r12 -mov %r10,%r13 -add x25519_x86_64_2P0(%rip),%rdx -add x25519_x86_64_2P1234(%rip),%rcx -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 40(%rsp),%rsi -addq 48(%rsp),%r8 -addq 56(%rsp),%r9 -addq 64(%rsp),%rax -addq 72(%rsp),%r10 -subq 40(%rsp),%rdx -subq 48(%rsp),%rcx -subq 56(%rsp),%r11 -subq 64(%rsp),%r12 -subq 72(%rsp),%r13 -movq %rsi,120(%rdi) -movq %r8,128(%rdi) -movq %r9,136(%rdi) -movq %rax,144(%rdi) -movq %r10,152(%rdi) -movq %rdx,160(%rdi) -movq %rcx,168(%rdi) -movq %r11,176(%rdi) -movq %r12,184(%rdi) -movq %r13,192(%rdi) -movq 120(%rdi),%rax -mulq 120(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 120(%rdi),%rax -shl $1,%rax -mulq 128(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 120(%rdi),%rax -shl $1,%rax -mulq 136(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 120(%rdi),%rax -shl $1,%rax -mulq 144(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 120(%rdi),%rax -shl $1,%rax -mulq 152(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 128(%rdi),%rax -mulq 128(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 128(%rdi),%rax -shl $1,%rax -mulq 136(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 128(%rdi),%rax -shl $1,%rax -mulq 144(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 128(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rdi),%rax -mulq 136(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 136(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 144(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 144(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 152(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 152(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,120(%rdi) -movq %r8,128(%rdi) -movq %r9,136(%rdi) -movq %rax,144(%rdi) -movq %r10,152(%rdi) -movq 160(%rdi),%rax -mulq 160(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 160(%rdi),%rax -shl $1,%rax -mulq 168(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 160(%rdi),%rax -shl $1,%rax -mulq 176(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 160(%rdi),%rax -shl $1,%rax -mulq 184(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 160(%rdi),%rax -shl $1,%rax -mulq 192(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 168(%rdi),%rax -mulq 168(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 168(%rdi),%rax -shl $1,%rax -mulq 176(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 168(%rdi),%rax -shl $1,%rax -mulq 184(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 168(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rax -mulq 176(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 176(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 184(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 184(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 192(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,160(%rdi) -movq %r8,168(%rdi) -movq %r9,176(%rdi) -movq %rax,184(%rdi) -movq %r10,192(%rdi) -movq 184(%rdi),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 16(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 192(%rdi),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 8(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 160(%rdi),%rax -mulq 0(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 160(%rdi),%rax -mulq 8(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 160(%rdi),%rax -mulq 16(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 160(%rdi),%rax -mulq 24(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 160(%rdi),%rax -mulq 32(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 168(%rdi),%rax -mulq 0(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 168(%rdi),%rax -mulq 8(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 168(%rdi),%rax -mulq 16(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 168(%rdi),%rax -mulq 24(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 168(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rax -mulq 0(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 176(%rdi),%rax -mulq 8(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 176(%rdi),%rax -mulq 16(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 176(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rax -mulq 0(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 184(%rdi),%rax -mulq 8(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 24(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 32(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 192(%rdi),%rax -mulq 0(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 16(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 24(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 32(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,160(%rdi) -movq %r8,168(%rdi) -movq %r9,176(%rdi) -movq %rax,184(%rdi) -movq %r10,192(%rdi) -movq 144(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 96(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 152(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 88(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 120(%rsp),%rax -mulq 80(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 120(%rsp),%rax -mulq 88(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 120(%rsp),%rax -mulq 96(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 120(%rsp),%rax -mulq 104(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 120(%rsp),%rax -mulq 112(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 128(%rsp),%rax -mulq 80(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 128(%rsp),%rax -mulq 88(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 128(%rsp),%rax -mulq 96(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 128(%rsp),%rax -mulq 104(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 128(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 112(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rsp),%rax -mulq 80(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 136(%rsp),%rax -mulq 88(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 136(%rsp),%rax -mulq 96(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 136(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 104(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 112(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rsp),%rax -mulq 80(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 144(%rsp),%rax -mulq 88(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 104(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 112(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 152(%rsp),%rax -mulq 80(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 96(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 104(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 112(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,40(%rdi) -movq %r8,48(%rdi) -movq %r9,56(%rdi) -movq %rax,64(%rdi) -movq %r10,72(%rdi) -movq 160(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -mov %rax,%rsi -mov %rdx,%rcx -movq 168(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%rcx -mov %rdx,%r8 -movq 176(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r8 -mov %rdx,%r9 -movq 184(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r9 -mov %rdx,%r10 -movq 192(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r10 -imulq $19,%rdx,%rdx -add %rdx,%rsi -addq 80(%rsp),%rsi -addq 88(%rsp),%rcx -addq 96(%rsp),%r8 -addq 104(%rsp),%r9 -addq 112(%rsp),%r10 -movq %rsi,80(%rdi) -movq %rcx,88(%rdi) -movq %r8,96(%rdi) -movq %r9,104(%rdi) -movq %r10,112(%rdi) -movq 104(%rdi),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 176(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 112(%rdi),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 168(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 80(%rdi),%rax -mulq 160(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 80(%rdi),%rax -mulq 168(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 80(%rdi),%rax -mulq 176(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 80(%rdi),%rax -mulq 184(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 80(%rdi),%rax -mulq 192(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 88(%rdi),%rax -mulq 160(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 88(%rdi),%rax -mulq 168(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 88(%rdi),%rax -mulq 176(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 88(%rdi),%rax -mulq 184(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 88(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 96(%rdi),%rax -mulq 160(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 96(%rdi),%rax -mulq 168(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 96(%rdi),%rax -mulq 176(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 96(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 184(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 96(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 104(%rdi),%rax -mulq 160(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 104(%rdi),%rax -mulq 168(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 184(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 192(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 112(%rdi),%rax -mulq 160(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 176(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 184(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 192(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,80(%rdi) -movq %r8,88(%rdi) -movq %r9,96(%rdi) -movq %rax,104(%rdi) -movq %r10,112(%rdi) -movq 296(%rsp),%r12 -movq 304(%rsp),%r13 -movq 312(%rsp),%r14 -movq 320(%rsp),%r15 -movq 328(%rsp),%rbx -movq 336(%rsp),%rbp -add $344,%rsp -.cfi_adjust_cfa_offset -344 -ret -.cfi_endproc - -.p2align 5 -.globl C_ABI(x25519_x86_64_work_cswap) -HIDDEN C_ABI(x25519_x86_64_work_cswap) -C_ABI(x25519_x86_64_work_cswap): -.cfi_startproc -subq $1,%rsi -notq %rsi -movq %rsi,%xmm15 -pshufd $0x44,%xmm15,%xmm15 -movdqu 0(%rdi),%xmm0 -movdqu 16(%rdi),%xmm2 -movdqu 32(%rdi),%xmm4 -movdqu 48(%rdi),%xmm6 -movdqu 64(%rdi),%xmm8 -movdqu 80(%rdi),%xmm1 -movdqu 96(%rdi),%xmm3 -movdqu 112(%rdi),%xmm5 -movdqu 128(%rdi),%xmm7 -movdqu 144(%rdi),%xmm9 -movdqa %xmm1,%xmm10 -movdqa %xmm3,%xmm11 -movdqa %xmm5,%xmm12 -movdqa %xmm7,%xmm13 -movdqa %xmm9,%xmm14 -pxor %xmm0,%xmm10 -pxor %xmm2,%xmm11 -pxor %xmm4,%xmm12 -pxor %xmm6,%xmm13 -pxor %xmm8,%xmm14 -pand %xmm15,%xmm10 -pand %xmm15,%xmm11 -pand %xmm15,%xmm12 -pand %xmm15,%xmm13 -pand %xmm15,%xmm14 -pxor %xmm10,%xmm0 -pxor %xmm10,%xmm1 -pxor %xmm11,%xmm2 -pxor %xmm11,%xmm3 -pxor %xmm12,%xmm4 -pxor %xmm12,%xmm5 -pxor %xmm13,%xmm6 -pxor %xmm13,%xmm7 -pxor %xmm14,%xmm8 -pxor %xmm14,%xmm9 -movdqu %xmm0,0(%rdi) -movdqu %xmm2,16(%rdi) -movdqu %xmm4,32(%rdi) -movdqu %xmm6,48(%rdi) -movdqu %xmm8,64(%rdi) -movdqu %xmm1,80(%rdi) -movdqu %xmm3,96(%rdi) -movdqu %xmm5,112(%rdi) -movdqu %xmm7,128(%rdi) -movdqu %xmm9,144(%rdi) -ret -.cfi_endproc - -#endif /* __x86_64__ */ -#endif /* !OPENSSL_NO_ASM */ diff --git a/src/crypto/curve25519/ed25519_test.cc b/src/crypto/curve25519/ed25519_test.cc index 31216f1b..4f34675b 100644 --- a/src/crypto/curve25519/ed25519_test.cc +++ b/src/crypto/curve25519/ed25519_test.cc @@ -44,6 +44,28 @@ TEST(Ed25519Test, TestVectors) { }); } +TEST(Ed25519Test, Malleability) { + // https://tools.ietf.org/html/rfc8032#section-5.1.7 adds an additional test + // that s be in [0, order). This prevents someone from adding a multiple of + // order to s and obtaining a second valid signature for the same message. + static const uint8_t kMsg[] = {0x54, 0x65, 0x73, 0x74}; + static const uint8_t kSig[] = { + 0x7c, 0x38, 0xe0, 0x26, 0xf2, 0x9e, 0x14, 0xaa, 0xbd, 0x05, 0x9a, + 0x0f, 0x2d, 0xb8, 0xb0, 0xcd, 0x78, 0x30, 0x40, 0x60, 0x9a, 0x8b, + 0xe6, 0x84, 0xdb, 0x12, 0xf8, 0x2a, 0x27, 0x77, 0x4a, 0xb0, 0x67, + 0x65, 0x4b, 0xce, 0x38, 0x32, 0xc2, 0xd7, 0x6f, 0x8f, 0x6f, 0x5d, + 0xaf, 0xc0, 0x8d, 0x93, 0x39, 0xd4, 0xee, 0xf6, 0x76, 0x57, 0x33, + 0x36, 0xa5, 0xc5, 0x1e, 0xb6, 0xf9, 0x46, 0xb3, 0x1d, + }; + static const uint8_t kPub[] = { + 0x7d, 0x4d, 0x0e, 0x7f, 0x61, 0x53, 0xa6, 0x9b, 0x62, 0x42, 0xb5, + 0x22, 0xab, 0xbe, 0xe6, 0x85, 0xfd, 0xa4, 0x42, 0x0f, 0x88, 0x34, + 0xb1, 0x08, 0xc3, 0xbd, 0xae, 0x36, 0x9e, 0xf5, 0x49, 0xfa, + }; + + EXPECT_FALSE(ED25519_verify(kMsg, sizeof(kMsg), kSig, kPub)); +} + TEST(Ed25519Test, KeypairFromSeed) { uint8_t public_key1[32], private_key1[64]; ED25519_keypair(public_key1, private_key1); diff --git a/src/crypto/curve25519/x25519-x86_64.c b/src/crypto/curve25519/x25519-x86_64.c deleted file mode 100644 index 41db0bdd..00000000 --- a/src/crypto/curve25519/x25519-x86_64.c +++ /dev/null @@ -1,247 +0,0 @@ -/* Copyright (c) 2015, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -// This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP -// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as -// public domain but this file has the ISC license just to keep licencing -// simple. -// -// The field functions are shared by Ed25519 and X25519 where possible. - -#include <openssl/curve25519.h> - -#include <string.h> - -#include "../internal.h" -#include "../../third_party/fiat/internal.h" - - -#if defined(BORINGSSL_X25519_X86_64) - -typedef struct { uint64_t v[5]; } fe25519; - -// These functions are defined in asm/x25519-x86_64.S -void x25519_x86_64_work_cswap(fe25519 *, uint64_t); -void x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b); -void x25519_x86_64_square(fe25519 *out, const fe25519 *a); -void x25519_x86_64_freeze(fe25519 *); -void x25519_x86_64_ladderstep(fe25519 *work); - -static void fe25519_setint(fe25519 *r, unsigned v) { - r->v[0] = v; - r->v[1] = 0; - r->v[2] = 0; - r->v[3] = 0; - r->v[4] = 0; -} - -// Assumes input x being reduced below 2^255 -static void fe25519_pack(unsigned char r[32], const fe25519 *x) { - fe25519 t; - t = *x; - x25519_x86_64_freeze(&t); - - r[0] = (uint8_t)(t.v[0] & 0xff); - r[1] = (uint8_t)((t.v[0] >> 8) & 0xff); - r[2] = (uint8_t)((t.v[0] >> 16) & 0xff); - r[3] = (uint8_t)((t.v[0] >> 24) & 0xff); - r[4] = (uint8_t)((t.v[0] >> 32) & 0xff); - r[5] = (uint8_t)((t.v[0] >> 40) & 0xff); - r[6] = (uint8_t)((t.v[0] >> 48)); - - r[6] ^= (uint8_t)((t.v[1] << 3) & 0xf8); - r[7] = (uint8_t)((t.v[1] >> 5) & 0xff); - r[8] = (uint8_t)((t.v[1] >> 13) & 0xff); - r[9] = (uint8_t)((t.v[1] >> 21) & 0xff); - r[10] = (uint8_t)((t.v[1] >> 29) & 0xff); - r[11] = (uint8_t)((t.v[1] >> 37) & 0xff); - r[12] = (uint8_t)((t.v[1] >> 45)); - - r[12] ^= (uint8_t)((t.v[2] << 6) & 0xc0); - r[13] = (uint8_t)((t.v[2] >> 2) & 0xff); - r[14] = (uint8_t)((t.v[2] >> 10) & 0xff); - r[15] = (uint8_t)((t.v[2] >> 18) & 0xff); - r[16] = (uint8_t)((t.v[2] >> 26) & 0xff); - r[17] = (uint8_t)((t.v[2] >> 34) & 0xff); - r[18] = (uint8_t)((t.v[2] >> 42) & 0xff); - r[19] = (uint8_t)((t.v[2] >> 50)); - - r[19] ^= (uint8_t)((t.v[3] << 1) & 0xfe); - r[20] = (uint8_t)((t.v[3] >> 7) & 0xff); - r[21] = (uint8_t)((t.v[3] >> 15) & 0xff); - r[22] = (uint8_t)((t.v[3] >> 23) & 0xff); - r[23] = (uint8_t)((t.v[3] >> 31) & 0xff); - r[24] = (uint8_t)((t.v[3] >> 39) & 0xff); - r[25] = (uint8_t)((t.v[3] >> 47)); - - r[25] ^= (uint8_t)((t.v[4] << 4) & 0xf0); - r[26] = (uint8_t)((t.v[4] >> 4) & 0xff); - r[27] = (uint8_t)((t.v[4] >> 12) & 0xff); - r[28] = (uint8_t)((t.v[4] >> 20) & 0xff); - r[29] = (uint8_t)((t.v[4] >> 28) & 0xff); - r[30] = (uint8_t)((t.v[4] >> 36) & 0xff); - r[31] = (uint8_t)((t.v[4] >> 44)); -} - -static void fe25519_unpack(fe25519 *r, const uint8_t x[32]) { - r->v[0] = x[0]; - r->v[0] += (uint64_t)x[1] << 8; - r->v[0] += (uint64_t)x[2] << 16; - r->v[0] += (uint64_t)x[3] << 24; - r->v[0] += (uint64_t)x[4] << 32; - r->v[0] += (uint64_t)x[5] << 40; - r->v[0] += ((uint64_t)x[6] & 7) << 48; - - r->v[1] = x[6] >> 3; - r->v[1] += (uint64_t)x[7] << 5; - r->v[1] += (uint64_t)x[8] << 13; - r->v[1] += (uint64_t)x[9] << 21; - r->v[1] += (uint64_t)x[10] << 29; - r->v[1] += (uint64_t)x[11] << 37; - r->v[1] += ((uint64_t)x[12] & 63) << 45; - - r->v[2] = x[12] >> 6; - r->v[2] += (uint64_t)x[13] << 2; - r->v[2] += (uint64_t)x[14] << 10; - r->v[2] += (uint64_t)x[15] << 18; - r->v[2] += (uint64_t)x[16] << 26; - r->v[2] += (uint64_t)x[17] << 34; - r->v[2] += (uint64_t)x[18] << 42; - r->v[2] += ((uint64_t)x[19] & 1) << 50; - - r->v[3] = x[19] >> 1; - r->v[3] += (uint64_t)x[20] << 7; - r->v[3] += (uint64_t)x[21] << 15; - r->v[3] += (uint64_t)x[22] << 23; - r->v[3] += (uint64_t)x[23] << 31; - r->v[3] += (uint64_t)x[24] << 39; - r->v[3] += ((uint64_t)x[25] & 15) << 47; - - r->v[4] = x[25] >> 4; - r->v[4] += (uint64_t)x[26] << 4; - r->v[4] += (uint64_t)x[27] << 12; - r->v[4] += (uint64_t)x[28] << 20; - r->v[4] += (uint64_t)x[29] << 28; - r->v[4] += (uint64_t)x[30] << 36; - r->v[4] += ((uint64_t)x[31] & 127) << 44; -} - -static void fe25519_invert(fe25519 *r, const fe25519 *x) { - fe25519 z2; - fe25519 z9; - fe25519 z11; - fe25519 z2_5_0; - fe25519 z2_10_0; - fe25519 z2_20_0; - fe25519 z2_50_0; - fe25519 z2_100_0; - fe25519 t; - int i; - - /* 2 */ x25519_x86_64_square(&z2, x); - /* 4 */ x25519_x86_64_square(&t, &z2); - /* 8 */ x25519_x86_64_square(&t, &t); - /* 9 */ x25519_x86_64_mul(&z9, &t, x); - /* 11 */ x25519_x86_64_mul(&z11, &z9, &z2); - /* 22 */ x25519_x86_64_square(&t, &z11); - /* 2^5 - 2^0 = 31 */ x25519_x86_64_mul(&z2_5_0, &t, &z9); - - /* 2^6 - 2^1 */ x25519_x86_64_square(&t, &z2_5_0); - /* 2^20 - 2^10 */ for (i = 1; i < 5; i++) { x25519_x86_64_square(&t, &t); } - /* 2^10 - 2^0 */ x25519_x86_64_mul(&z2_10_0, &t, &z2_5_0); - - /* 2^11 - 2^1 */ x25519_x86_64_square(&t, &z2_10_0); - /* 2^20 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } - /* 2^20 - 2^0 */ x25519_x86_64_mul(&z2_20_0, &t, &z2_10_0); - - /* 2^21 - 2^1 */ x25519_x86_64_square(&t, &z2_20_0); - /* 2^40 - 2^20 */ for (i = 1; i < 20; i++) { x25519_x86_64_square(&t, &t); } - /* 2^40 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_20_0); - - /* 2^41 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^50 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } - /* 2^50 - 2^0 */ x25519_x86_64_mul(&z2_50_0, &t, &z2_10_0); - - /* 2^51 - 2^1 */ x25519_x86_64_square(&t, &z2_50_0); - /* 2^100 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } - /* 2^100 - 2^0 */ x25519_x86_64_mul(&z2_100_0, &t, &z2_50_0); - - /* 2^101 - 2^1 */ x25519_x86_64_square(&t, &z2_100_0); - /* 2^200 - 2^100 */ for (i = 1; i < 100; i++) { - x25519_x86_64_square(&t, &t); - } - /* 2^200 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_100_0); - - /* 2^201 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^250 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } - /* 2^250 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_50_0); - - /* 2^251 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^252 - 2^2 */ x25519_x86_64_square(&t, &t); - /* 2^253 - 2^3 */ x25519_x86_64_square(&t, &t); - - /* 2^254 - 2^4 */ x25519_x86_64_square(&t, &t); - - /* 2^255 - 2^5 */ x25519_x86_64_square(&t, &t); - /* 2^255 - 21 */ x25519_x86_64_mul(r, &t, &z11); -} - -static void mladder(fe25519 *xr, fe25519 *zr, const uint8_t s[32]) { - fe25519 work[5]; - - work[0] = *xr; - fe25519_setint(work + 1, 1); - fe25519_setint(work + 2, 0); - work[3] = *xr; - fe25519_setint(work + 4, 1); - - int i, j; - uint8_t prevbit = 0; - - j = 6; - for (i = 31; i >= 0; i--) { - while (j >= 0) { - const uint8_t bit = 1 & (s[i] >> j); - const uint64_t swap = bit ^ prevbit; - prevbit = bit; - x25519_x86_64_work_cswap(work + 1, swap); - x25519_x86_64_ladderstep(work); - j -= 1; - } - j = 7; - } - - *xr = work[1]; - *zr = work[2]; -} - -void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32], - const uint8_t point[32]) { - uint8_t e[32]; - OPENSSL_memcpy(e, scalar, sizeof(e)); - - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - - fe25519 t; - fe25519 z; - fe25519_unpack(&t, point); - mladder(&t, &z, e); - fe25519_invert(&z, &z); - x25519_x86_64_mul(&t, &t, &z); - fe25519_pack(out, &t); -} - -#endif // BORINGSSL_X25519_X86_64 diff --git a/src/crypto/dsa/dsa.c b/src/crypto/dsa/dsa.c index f3d4f859..532ffec9 100644 --- a/src/crypto/dsa/dsa.c +++ b/src/crypto/dsa/dsa.c @@ -239,11 +239,6 @@ int DSA_generate_parameters_ex(DSA *dsa, unsigned bits, const uint8_t *seed_in, } BN_CTX_start(ctx); - mont = BN_MONT_CTX_new(); - if (mont == NULL) { - goto err; - } - r0 = BN_CTX_get(ctx); g = BN_CTX_get(ctx); W = BN_CTX_get(ctx); @@ -401,8 +396,9 @@ end: goto err; } - if (!BN_set_word(test, h) || - !BN_MONT_CTX_set(mont, p, ctx)) { + mont = BN_MONT_CTX_new_for_modulus(p, ctx); + if (mont == NULL || + !BN_set_word(test, h)) { goto err; } diff --git a/src/crypto/err/ssl.errordata b/src/crypto/err/ssl.errordata index 44509584..7b63bc8e 100644 --- a/src/crypto/err/ssl.errordata +++ b/src/crypto/err/ssl.errordata @@ -117,6 +117,7 @@ SSL,191,PATH_TOO_LONG SSL,192,PEER_DID_NOT_RETURN_A_CERTIFICATE SSL,193,PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE SSL,267,PRE_SHARED_KEY_MUST_BE_LAST +SSL,287,PRIVATE_KEY_OPERATION_FAILED SSL,194,PROTOCOL_IS_SHUTDOWN SSL,271,PSK_IDENTITY_BINDER_COUNT_MISMATCH SSL,195,PSK_IDENTITY_NOT_FOUND diff --git a/src/crypto/fipsmodule/bn/add.c b/src/crypto/fipsmodule/bn/add.c index 201c526d..645e647d 100644 --- a/src/crypto/fipsmodule/bn/add.c +++ b/src/crypto/fipsmodule/bn/add.c @@ -199,7 +199,6 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) { } int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { - int max; int add = 0, neg = 0; const BIGNUM *tmp; @@ -232,13 +231,6 @@ int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { return 1; } - // We are actually doing a - b :-) - - max = (a->top > b->top) ? a->top : b->top; - if (!bn_wexpand(r, max)) { - return 0; - } - if (BN_ucmp(a, b) < 0) { if (!BN_usub(r, b, a)) { return 0; diff --git a/src/crypto/fipsmodule/bn/bn.c b/src/crypto/fipsmodule/bn/bn.c index 4be4f21c..520ca27d 100644 --- a/src/crypto/fipsmodule/bn/bn.c +++ b/src/crypto/fipsmodule/bn/bn.c @@ -227,13 +227,12 @@ unsigned BN_num_bits_word(BN_ULONG l) { } unsigned BN_num_bits(const BIGNUM *bn) { - const int max = bn->top - 1; - - if (BN_is_zero(bn)) { + const int width = bn_minimal_width(bn); + if (width == 0) { return 0; } - return max*BN_BITS2 + BN_num_bits_word(bn->d[max]); + return (width - 1) * BN_BITS2 + BN_num_bits_word(bn->d[width - 1]); } unsigned BN_num_bytes(const BIGNUM *bn) { @@ -298,6 +297,35 @@ int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num) { return 1; } +int bn_fits_in_words(const BIGNUM *bn, size_t num) { + // All words beyond |num| must be zero. + BN_ULONG mask = 0; + for (size_t i = num; i < (size_t)bn->top; i++) { + mask |= bn->d[i]; + } + return mask == 0; +} + +int bn_copy_words(BN_ULONG *out, size_t num, const BIGNUM *bn) { + if (bn->neg) { + OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER); + return 0; + } + + size_t width = (size_t)bn->top; + if (width > num) { + if (!bn_fits_in_words(bn, num)) { + OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG); + return 0; + } + width = num; + } + + OPENSSL_memset(out, 0, sizeof(BN_ULONG) * num); + OPENSSL_memcpy(out, bn->d, sizeof(BN_ULONG) * width); + return 1; +} + int BN_is_negative(const BIGNUM *bn) { return bn->neg != 0; } @@ -350,19 +378,35 @@ int bn_expand(BIGNUM *bn, size_t bits) { return bn_wexpand(bn, (bits+BN_BITS2-1)/BN_BITS2); } -void bn_correct_top(BIGNUM *bn) { - BN_ULONG *ftl; - int tmp_top = bn->top; - - if (tmp_top > 0) { - for (ftl = &(bn->d[tmp_top - 1]); tmp_top > 0; tmp_top--) { - if (*(ftl--)) { - break; - } +int bn_resize_words(BIGNUM *bn, size_t words) { + if ((size_t)bn->top <= words) { + if (!bn_wexpand(bn, words)) { + return 0; } - bn->top = tmp_top; + OPENSSL_memset(bn->d + bn->top, 0, (words - bn->top) * sizeof(BN_ULONG)); + bn->top = words; + return 1; + } + + // All words beyond the new width must be zero. + if (!bn_fits_in_words(bn, words)) { + OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG); + return 0; } + bn->top = words; + return 1; +} + +int bn_minimal_width(const BIGNUM *bn) { + int ret = bn->top; + while (ret > 0 && bn->d[ret - 1] == 0) { + ret--; + } + return ret; +} +void bn_correct_top(BIGNUM *bn) { + bn->top = bn_minimal_width(bn); if (bn->top == 0) { bn->neg = 0; } diff --git a/src/crypto/fipsmodule/bn/bn_test.cc b/src/crypto/fipsmodule/bn/bn_test.cc index ca5f978d..f36656f6 100644 --- a/src/crypto/fipsmodule/bn/bn_test.cc +++ b/src/crypto/fipsmodule/bn/bn_test.cc @@ -387,15 +387,15 @@ static void TestSquare(FileTest *t, BN_CTX *ctx) { } #if !defined(BORINGSSL_SHARED_LIBRARY) - if (static_cast<size_t>(a->top) <= BN_SMALL_MAX_WORDS) { - for (size_t num_a = a->top; num_a <= BN_SMALL_MAX_WORDS; num_a++) { + int a_width = bn_minimal_width(a.get()); + if (a_width <= BN_SMALL_MAX_WORDS) { + for (size_t num_a = a_width; num_a <= BN_SMALL_MAX_WORDS; num_a++) { SCOPED_TRACE(num_a); size_t num_r = 2 * num_a; // Use newly-allocated buffers so ASan will catch out-of-bounds writes. std::unique_ptr<BN_ULONG[]> a_words(new BN_ULONG[num_a]), r_words(new BN_ULONG[num_r]); - OPENSSL_memset(a_words.get(), 0, num_a * sizeof(BN_ULONG)); - OPENSSL_memcpy(a_words.get(), a->d, a->top * sizeof(BN_ULONG)); + ASSERT_TRUE(bn_copy_words(a_words.get(), num_a, a.get())); ASSERT_TRUE(bn_mul_small(r_words.get(), num_r, a_words.get(), num_a, a_words.get(), num_a)); @@ -445,22 +445,25 @@ static void TestProduct(FileTest *t, BN_CTX *ctx) { } #if !defined(BORINGSSL_SHARED_LIBRARY) - if (!BN_is_negative(product.get()) && - static_cast<size_t>(a->top) <= BN_SMALL_MAX_WORDS && - static_cast<size_t>(b->top) <= BN_SMALL_MAX_WORDS) { - for (size_t num_a = a->top; num_a <= BN_SMALL_MAX_WORDS; num_a++) { + BN_set_negative(a.get(), 0); + BN_set_negative(b.get(), 0); + BN_set_negative(product.get(), 0); + + int a_width = bn_minimal_width(a.get()); + int b_width = bn_minimal_width(b.get()); + if (a_width <= BN_SMALL_MAX_WORDS && b_width <= BN_SMALL_MAX_WORDS) { + for (size_t num_a = static_cast<size_t>(a_width); + num_a <= BN_SMALL_MAX_WORDS; num_a++) { SCOPED_TRACE(num_a); - for (size_t num_b = b->top; num_b <= BN_SMALL_MAX_WORDS; num_b++) { + for (size_t num_b = static_cast<size_t>(b_width); + num_b <= BN_SMALL_MAX_WORDS; num_b++) { SCOPED_TRACE(num_b); size_t num_r = num_a + num_b; // Use newly-allocated buffers so ASan will catch out-of-bounds writes. std::unique_ptr<BN_ULONG[]> a_words(new BN_ULONG[num_a]), b_words(new BN_ULONG[num_b]), r_words(new BN_ULONG[num_r]); - OPENSSL_memset(a_words.get(), 0, num_a * sizeof(BN_ULONG)); - OPENSSL_memcpy(a_words.get(), a->d, a->top * sizeof(BN_ULONG)); - - OPENSSL_memset(b_words.get(), 0, num_b * sizeof(BN_ULONG)); - OPENSSL_memcpy(b_words.get(), b->d, b->top * sizeof(BN_ULONG)); + ASSERT_TRUE(bn_copy_words(a_words.get(), num_a, a.get())); + ASSERT_TRUE(bn_copy_words(b_words.get(), num_b, b.get())); ASSERT_TRUE(bn_mul_small(r_words.get(), num_r, a_words.get(), num_a, b_words.get(), num_b)); @@ -537,12 +540,12 @@ static void TestModMul(FileTest *t, BN_CTX *ctx) { if (BN_is_odd(m.get())) { // Reduce |a| and |b| and test the Montgomery version. - bssl::UniquePtr<BN_MONT_CTX> mont(BN_MONT_CTX_new()); + bssl::UniquePtr<BN_MONT_CTX> mont( + BN_MONT_CTX_new_for_modulus(m.get(), ctx)); bssl::UniquePtr<BIGNUM> a_tmp(BN_new()), b_tmp(BN_new()); ASSERT_TRUE(mont); ASSERT_TRUE(a_tmp); ASSERT_TRUE(b_tmp); - ASSERT_TRUE(BN_MONT_CTX_set(mont.get(), m.get(), ctx)); ASSERT_TRUE(BN_nnmod(a.get(), a.get(), m.get(), ctx)); ASSERT_TRUE(BN_nnmod(b.get(), b.get(), m.get(), ctx)); ASSERT_TRUE(BN_to_montgomery(a_tmp.get(), a.get(), mont.get(), ctx)); @@ -554,24 +557,23 @@ static void TestModMul(FileTest *t, BN_CTX *ctx) { ret.get()); #if !defined(BORINGSSL_SHARED_LIBRARY) - if (m->top <= BN_SMALL_MAX_WORDS) { - std::unique_ptr<BN_ULONG[]> a_words(new BN_ULONG[m->top]), - b_words(new BN_ULONG[m->top]), r_words(new BN_ULONG[m->top]); - OPENSSL_memset(a_words.get(), 0, m->top * sizeof(BN_ULONG)); - OPENSSL_memcpy(a_words.get(), a->d, a->top * sizeof(BN_ULONG)); - OPENSSL_memset(b_words.get(), 0, m->top * sizeof(BN_ULONG)); - OPENSSL_memcpy(b_words.get(), b->d, b->top * sizeof(BN_ULONG)); - ASSERT_TRUE(bn_to_montgomery_small(a_words.get(), m->top, a_words.get(), - m->top, mont.get())); - ASSERT_TRUE(bn_to_montgomery_small(b_words.get(), m->top, b_words.get(), - m->top, mont.get())); + size_t m_width = static_cast<size_t>(bn_minimal_width(m.get())); + if (m_width <= BN_SMALL_MAX_WORDS) { + std::unique_ptr<BN_ULONG[]> a_words(new BN_ULONG[m_width]), + b_words(new BN_ULONG[m_width]), r_words(new BN_ULONG[m_width]); + ASSERT_TRUE(bn_copy_words(a_words.get(), m_width, a.get())); + ASSERT_TRUE(bn_copy_words(b_words.get(), m_width, b.get())); + ASSERT_TRUE(bn_to_montgomery_small(a_words.get(), m_width, a_words.get(), + m_width, mont.get())); + ASSERT_TRUE(bn_to_montgomery_small(b_words.get(), m_width, b_words.get(), + m_width, mont.get())); ASSERT_TRUE(bn_mod_mul_montgomery_small( - r_words.get(), m->top, a_words.get(), m->top, b_words.get(), m->top, + r_words.get(), m_width, a_words.get(), m_width, b_words.get(), m_width, mont.get())); // Use the second half of |tmp| so ASan will catch out-of-bounds writes. - ASSERT_TRUE(bn_from_montgomery_small(r_words.get(), m->top, r_words.get(), - m->top, mont.get())); - ASSERT_TRUE(bn_set_words(ret.get(), r_words.get(), m->top)); + ASSERT_TRUE(bn_from_montgomery_small(r_words.get(), m_width, r_words.get(), + m_width, mont.get())); + ASSERT_TRUE(bn_set_words(ret.get(), r_words.get(), m_width)); EXPECT_BIGNUMS_EQUAL("A * B (mod M) (Montgomery, words)", mod_mul.get(), ret.get()); } @@ -601,11 +603,11 @@ static void TestModSquare(FileTest *t, BN_CTX *ctx) { if (BN_is_odd(m.get())) { // Reduce |a| and test the Montgomery version. - bssl::UniquePtr<BN_MONT_CTX> mont(BN_MONT_CTX_new()); + bssl::UniquePtr<BN_MONT_CTX> mont( + BN_MONT_CTX_new_for_modulus(m.get(), ctx)); bssl::UniquePtr<BIGNUM> a_tmp(BN_new()); ASSERT_TRUE(mont); ASSERT_TRUE(a_tmp); - ASSERT_TRUE(BN_MONT_CTX_set(mont.get(), m.get(), ctx)); ASSERT_TRUE(BN_nnmod(a.get(), a.get(), m.get(), ctx)); ASSERT_TRUE(BN_to_montgomery(a_tmp.get(), a.get(), mont.get(), ctx)); ASSERT_TRUE(BN_mod_mul_montgomery(ret.get(), a_tmp.get(), a_tmp.get(), @@ -623,32 +625,32 @@ static void TestModSquare(FileTest *t, BN_CTX *ctx) { ret.get()); #if !defined(BORINGSSL_SHARED_LIBRARY) - if (m->top <= BN_SMALL_MAX_WORDS) { - std::unique_ptr<BN_ULONG[]> a_words(new BN_ULONG[m->top]), - a_copy_words(new BN_ULONG[m->top]), r_words(new BN_ULONG[m->top]); - OPENSSL_memset(a_words.get(), 0, m->top * sizeof(BN_ULONG)); - OPENSSL_memcpy(a_words.get(), a->d, a->top * sizeof(BN_ULONG)); - ASSERT_TRUE(bn_to_montgomery_small(a_words.get(), m->top, a_words.get(), - m->top, mont.get())); + size_t m_width = static_cast<size_t>(bn_minimal_width(m.get())); + if (m_width <= BN_SMALL_MAX_WORDS) { + std::unique_ptr<BN_ULONG[]> a_words(new BN_ULONG[m_width]), + a_copy_words(new BN_ULONG[m_width]), r_words(new BN_ULONG[m_width]); + ASSERT_TRUE(bn_copy_words(a_words.get(), m_width, a.get())); + ASSERT_TRUE(bn_to_montgomery_small(a_words.get(), m_width, a_words.get(), + m_width, mont.get())); ASSERT_TRUE(bn_mod_mul_montgomery_small( - r_words.get(), m->top, a_words.get(), m->top, a_words.get(), m->top, - mont.get())); - ASSERT_TRUE(bn_from_montgomery_small(r_words.get(), m->top, r_words.get(), - m->top, mont.get())); - ASSERT_TRUE(bn_set_words(ret.get(), r_words.get(), m->top)); + r_words.get(), m_width, a_words.get(), m_width, a_words.get(), + m_width, mont.get())); + ASSERT_TRUE(bn_from_montgomery_small(r_words.get(), m_width, + r_words.get(), m_width, mont.get())); + ASSERT_TRUE(bn_set_words(ret.get(), r_words.get(), m_width)); EXPECT_BIGNUMS_EQUAL("A * A (mod M) (Montgomery, words)", mod_square.get(), ret.get()); // Repeat the operation with |a_copy_words|. OPENSSL_memcpy(a_copy_words.get(), a_words.get(), - m->top * sizeof(BN_ULONG)); + m_width * sizeof(BN_ULONG)); ASSERT_TRUE(bn_mod_mul_montgomery_small( - r_words.get(), m->top, a_words.get(), m->top, a_copy_words.get(), - m->top, mont.get())); + r_words.get(), m_width, a_words.get(), m_width, a_copy_words.get(), + m_width, mont.get())); // Use the second half of |tmp| so ASan will catch out-of-bounds writes. - ASSERT_TRUE(bn_from_montgomery_small(r_words.get(), m->top, r_words.get(), - m->top, mont.get())); - ASSERT_TRUE(bn_set_words(ret.get(), r_words.get(), m->top)); + ASSERT_TRUE(bn_from_montgomery_small(r_words.get(), m_width, + r_words.get(), m_width, mont.get())); + ASSERT_TRUE(bn_set_words(ret.get(), r_words.get(), m_width)); EXPECT_BIGNUMS_EQUAL("A * A_copy (mod M) (Montgomery, words)", mod_square.get(), ret.get()); } @@ -683,22 +685,22 @@ static void TestModExp(FileTest *t, BN_CTX *ctx) { ret.get()); #if !defined(BORINGSSL_SHARED_LIBRARY) - if (m->top <= BN_SMALL_MAX_WORDS) { - bssl::UniquePtr<BN_MONT_CTX> mont(BN_MONT_CTX_new()); + size_t m_width = static_cast<size_t>(bn_minimal_width(m.get())); + if (m_width <= BN_SMALL_MAX_WORDS) { + bssl::UniquePtr<BN_MONT_CTX> mont( + BN_MONT_CTX_new_for_modulus(m.get(), ctx)); ASSERT_TRUE(mont.get()); - ASSERT_TRUE(BN_MONT_CTX_set(mont.get(), m.get(), ctx)); ASSERT_TRUE(BN_nnmod(a.get(), a.get(), m.get(), ctx)); - std::unique_ptr<BN_ULONG[]> r_words(new BN_ULONG[m->top]), - a_words(new BN_ULONG[m->top]); - OPENSSL_memset(a_words.get(), 0, m->top * sizeof(BN_ULONG)); - OPENSSL_memcpy(a_words.get(), a->d, a->top * sizeof(BN_ULONG)); - ASSERT_TRUE(bn_to_montgomery_small(a_words.get(), m->top, a_words.get(), - m->top, mont.get())); - ASSERT_TRUE(bn_mod_exp_mont_small(r_words.get(), m->top, a_words.get(), - m->top, e->d, e->top, mont.get())); - ASSERT_TRUE(bn_from_montgomery_small(r_words.get(), m->top, r_words.get(), - m->top, mont.get())); - ASSERT_TRUE(bn_set_words(ret.get(), r_words.get(), m->top)); + std::unique_ptr<BN_ULONG[]> r_words(new BN_ULONG[m_width]), + a_words(new BN_ULONG[m_width]); + ASSERT_TRUE(bn_copy_words(a_words.get(), m_width, a.get())); + ASSERT_TRUE(bn_to_montgomery_small(a_words.get(), m_width, a_words.get(), + m_width, mont.get())); + ASSERT_TRUE(bn_mod_exp_mont_small(r_words.get(), m_width, a_words.get(), + m_width, e->d, e->top, mont.get())); + ASSERT_TRUE(bn_from_montgomery_small(r_words.get(), m_width, + r_words.get(), m_width, mont.get())); + ASSERT_TRUE(bn_set_words(ret.get(), r_words.get(), m_width)); EXPECT_BIGNUMS_EQUAL("A ^ E (mod M) (Montgomery, words)", mod_exp.get(), ret.get()); } @@ -862,6 +864,17 @@ TEST_F(BNTest, BN2BinPadded) { EXPECT_EQ(Bytes(zeros, sizeof(out) - bytes), Bytes(out, sizeof(out) - bytes)); EXPECT_EQ(Bytes(reference, bytes), Bytes(out + sizeof(out) - bytes, bytes)); + +#if !defined(BORINGSSL_SHARED_LIBRARY) + // Repeat some tests with a non-minimal |BIGNUM|. + EXPECT_TRUE(bn_resize_words(n.get(), 32)); + + EXPECT_FALSE(BN_bn2bin_padded(out, bytes - 1, n.get())); + + ASSERT_TRUE(BN_bn2bin_padded(out, bytes + 1, n.get())); + EXPECT_EQ(0u, out[0]); + EXPECT_EQ(Bytes(reference, bytes), Bytes(out + 1, bytes)); +#endif } } @@ -1267,11 +1280,9 @@ TEST_F(BNTest, BadModulus) { bssl::UniquePtr<BIGNUM> a(BN_new()); bssl::UniquePtr<BIGNUM> b(BN_new()); bssl::UniquePtr<BIGNUM> zero(BN_new()); - bssl::UniquePtr<BN_MONT_CTX> mont(BN_MONT_CTX_new()); ASSERT_TRUE(a); ASSERT_TRUE(b); ASSERT_TRUE(zero); - ASSERT_TRUE(mont); BN_zero(zero.get()); @@ -1294,13 +1305,16 @@ TEST_F(BNTest, BadModulus) { a.get(), BN_value_one(), BN_value_one(), zero.get(), ctx(), nullptr)); ERR_clear_error(); - EXPECT_FALSE(BN_MONT_CTX_set(mont.get(), zero.get(), ctx())); + bssl::UniquePtr<BN_MONT_CTX> mont( + BN_MONT_CTX_new_for_modulus(zero.get(), ctx())); + EXPECT_FALSE(mont); ERR_clear_error(); // Some operations also may not be used with an even modulus. ASSERT_TRUE(BN_set_word(b.get(), 16)); - EXPECT_FALSE(BN_MONT_CTX_set(mont.get(), b.get(), ctx())); + mont.reset(BN_MONT_CTX_new_for_modulus(b.get(), ctx())); + EXPECT_FALSE(mont); ERR_clear_error(); EXPECT_FALSE(BN_mod_exp_mont(a.get(), BN_value_one(), BN_value_one(), b.get(), @@ -1883,4 +1897,100 @@ TEST_F(BNTest, LessThanWords) { EXPECT_EQ(0, bn_less_than_words(NULL, NULL, 0)); EXPECT_EQ(0, bn_in_range_words(NULL, 0, NULL, 0)); } + +TEST_F(BNTest, NonMinimal) { + bssl::UniquePtr<BIGNUM> ten(BN_new()); + ASSERT_TRUE(ten); + ASSERT_TRUE(BN_set_word(ten.get(), 10)); + + bssl::UniquePtr<BIGNUM> ten_copy(BN_dup(ten.get())); + ASSERT_TRUE(ten_copy); + + bssl::UniquePtr<BIGNUM> eight(BN_new()); + ASSERT_TRUE(eight); + ASSERT_TRUE(BN_set_word(eight.get(), 8)); + + bssl::UniquePtr<BIGNUM> forty_two(BN_new()); + ASSERT_TRUE(forty_two); + ASSERT_TRUE(BN_set_word(forty_two.get(), 42)); + + bssl::UniquePtr<BIGNUM> two_exp_256(BN_new()); + ASSERT_TRUE(two_exp_256); + ASSERT_TRUE(BN_lshift(two_exp_256.get(), BN_value_one(), 256)); + + // Check some comparison functions on |ten| before and after expanding. + for (size_t width = 1; width < 10; width++) { + SCOPED_TRACE(width); + // Make a wider version of |ten|. + EXPECT_TRUE(bn_resize_words(ten.get(), width)); + EXPECT_EQ(static_cast<int>(width), ten->top); + + EXPECT_TRUE(BN_abs_is_word(ten.get(), 10)); + EXPECT_TRUE(BN_is_word(ten.get(), 10)); + EXPECT_EQ(10u, BN_get_word(ten.get())); + uint64_t v; + ASSERT_TRUE(BN_get_u64(ten.get(), &v)); + EXPECT_EQ(10u, v); + + EXPECT_TRUE(BN_equal_consttime(ten.get(), ten_copy.get())); + EXPECT_TRUE(BN_equal_consttime(ten_copy.get(), ten.get())); + EXPECT_FALSE(BN_less_than_consttime(ten.get(), ten_copy.get())); + EXPECT_FALSE(BN_less_than_consttime(ten_copy.get(), ten.get())); + EXPECT_EQ(BN_cmp(ten.get(), ten_copy.get()), 0); + + EXPECT_FALSE(BN_equal_consttime(ten.get(), eight.get())); + EXPECT_FALSE(BN_less_than_consttime(ten.get(), eight.get())); + EXPECT_TRUE(BN_less_than_consttime(eight.get(), ten.get())); + EXPECT_LT(BN_cmp(eight.get(), ten.get()), 0); + + EXPECT_FALSE(BN_equal_consttime(ten.get(), forty_two.get())); + EXPECT_TRUE(BN_less_than_consttime(ten.get(), forty_two.get())); + EXPECT_FALSE(BN_less_than_consttime(forty_two.get(), ten.get())); + EXPECT_GT(BN_cmp(forty_two.get(), ten.get()), 0); + + EXPECT_FALSE(BN_equal_consttime(ten.get(), two_exp_256.get())); + EXPECT_TRUE(BN_less_than_consttime(ten.get(), two_exp_256.get())); + EXPECT_FALSE(BN_less_than_consttime(two_exp_256.get(), ten.get())); + EXPECT_GT(BN_cmp(two_exp_256.get(), ten.get()), 0); + + EXPECT_EQ(4u, BN_num_bits(ten.get())); + EXPECT_EQ(1u, BN_num_bytes(ten.get())); + EXPECT_FALSE(BN_is_pow2(ten.get())); + } + + // |ten| may be resized back down to one word. + EXPECT_TRUE(bn_resize_words(ten.get(), 1)); + EXPECT_EQ(1, ten->top); + + // But not to zero words, which it does not fit. + EXPECT_FALSE(bn_resize_words(ten.get(), 0)); + + EXPECT_TRUE(BN_is_pow2(eight.get())); + EXPECT_TRUE(bn_resize_words(eight.get(), 4)); + EXPECT_EQ(4, eight->top); + EXPECT_TRUE(BN_is_pow2(eight.get())); + + // |BN_MONT_CTX| is always stored minimally and uses the same R independent of + // input width. + static const uint8_t kP[] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + bssl::UniquePtr<BIGNUM> p(BN_bin2bn(kP, sizeof(kP), nullptr)); + ASSERT_TRUE(p); + + bssl::UniquePtr<BN_MONT_CTX> mont( + BN_MONT_CTX_new_for_modulus(p.get(), ctx())); + ASSERT_TRUE(mont); + + ASSERT_TRUE(bn_resize_words(p.get(), 32)); + bssl::UniquePtr<BN_MONT_CTX> mont2( + BN_MONT_CTX_new_for_modulus(p.get(), ctx())); + ASSERT_TRUE(mont2); + + EXPECT_EQ(mont->N.top, mont2->N.top); + EXPECT_EQ(0, BN_cmp(&mont->RR, &mont2->RR)); +} + #endif // !BORINGSSL_SHARED_LIBRARY diff --git a/src/crypto/fipsmodule/bn/bytes.c b/src/crypto/fipsmodule/bn/bytes.c index 328d56e7..aa654835 100644 --- a/src/crypto/fipsmodule/bn/bytes.c +++ b/src/crypto/fipsmodule/bn/bytes.c @@ -159,22 +159,9 @@ size_t BN_bn2bin(const BIGNUM *in, uint8_t *out) { return n; } -int BN_bn2le_padded(uint8_t *out, size_t len, const BIGNUM *in) { - // If we don't have enough space, fail out. - size_t num_bytes = BN_num_bytes(in); - if (len < num_bytes) { - return 0; - } - - // We only support little-endian platforms, so we can simply memcpy into the - // internal representation. - OPENSSL_memcpy(out, in->d, num_bytes); - - // Pad out the rest of the buffer with zeroes. - OPENSSL_memset(out + num_bytes, 0, len - num_bytes); - - return 1; -} +// TODO(davidben): This does not need to be quite so complex once the |BIGNUM|s +// we care about are fixed-width. |read_word_padded| is a hack to paper over +// parts of the |bn_correct_top| leak. Fix that, and this can be simpler. // constant_time_select_ulong returns |x| if |v| is 1 and |y| if |v| is 0. Its // behavior is undefined if |v| takes any other value. @@ -197,6 +184,10 @@ static int constant_time_le_size_t(size_t x, size_t y) { // the access would be out of bounds, it reads the last word of |in|. |in| must // not be zero. static BN_ULONG read_word_padded(const BIGNUM *in, size_t i) { + if (in->dmax == 0) { + return 0; + } + // Read |in->d[i]| if valid. Otherwise, read the last word. BN_ULONG l = in->d[constant_time_select_ulong( constant_time_le_size_t(in->dmax, i), in->dmax - 1, i)]; @@ -205,24 +196,45 @@ static BN_ULONG read_word_padded(const BIGNUM *in, size_t i) { return constant_time_select_ulong(constant_time_le_size_t(in->top, i), 0, l); } -int BN_bn2bin_padded(uint8_t *out, size_t len, const BIGNUM *in) { - // Special case for |in| = 0. Just branch as the probability is negligible. - if (BN_is_zero(in)) { - OPENSSL_memset(out, 0, len); - return 1; +static int fits_in_bytes(const BIGNUM *in, size_t len) { + BN_ULONG mask = 0; + for (size_t i = (len + (BN_BYTES - 1)) / BN_BYTES; i < (size_t)in->top; i++) { + mask |= in->d[i]; } + if ((len % BN_BYTES) != 0) { + BN_ULONG l = read_word_padded(in, len / BN_BYTES); + mask |= l >> (8 * (len % BN_BYTES)); + } + return mask == 0; +} +int BN_bn2le_padded(uint8_t *out, size_t len, const BIGNUM *in) { + // If we don't have enough space, fail out. + if (!fits_in_bytes(in, len)) { + return 0; + } + + size_t todo = in->top * BN_BYTES; + if (todo > len) { + todo = len; + } + + // We only support little-endian platforms, so we can simply memcpy into the + // internal representation. + OPENSSL_memcpy(out, in->d, todo); + + // Pad out the rest of the buffer with zeroes. + OPENSSL_memset(out + todo, 0, len - todo); + + return 1; +} + +int BN_bn2bin_padded(uint8_t *out, size_t len, const BIGNUM *in) { // Check if the integer is too big. This case can exit early in non-constant // time. - if ((size_t)in->top > (len + (BN_BYTES - 1)) / BN_BYTES) { + if (!fits_in_bytes(in, len)) { return 0; } - if ((len % BN_BYTES) != 0) { - BN_ULONG l = read_word_padded(in, len / BN_BYTES); - if (l >> (8 * (len % BN_BYTES)) != 0) { - return 0; - } - } // Write the bytes out one by one. Serialization is done without branching on // the bits of |in| or on |in->top|, but if the routine would otherwise read @@ -240,7 +252,7 @@ int BN_bn2bin_padded(uint8_t *out, size_t len, const BIGNUM *in) { } BN_ULONG BN_get_word(const BIGNUM *bn) { - switch (bn->top) { + switch (bn_minimal_width(bn)) { case 0: return 0; case 1: @@ -251,7 +263,7 @@ BN_ULONG BN_get_word(const BIGNUM *bn) { } int BN_get_u64(const BIGNUM *bn, uint64_t *out) { - switch (bn->top) { + switch (bn_minimal_width(bn)) { case 0: *out = 0; return 1; diff --git a/src/crypto/fipsmodule/bn/cmp.c b/src/crypto/fipsmodule/bn/cmp.c index acc017ff..265c8526 100644 --- a/src/crypto/fipsmodule/bn/cmp.c +++ b/src/crypto/fipsmodule/bn/cmp.c @@ -64,19 +64,18 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b) { - int i; - BN_ULONG t1, t2, *ap, *bp; - - i = a->top - b->top; + int a_width = bn_minimal_width(a); + int b_width = bn_minimal_width(b); + int i = a_width - b_width; if (i != 0) { return i; } - ap = a->d; - bp = b->d; - for (i = a->top - 1; i >= 0; i--) { - t1 = ap[i]; - t2 = bp[i]; + const BN_ULONG *ap = a->d; + const BN_ULONG *bp = b->d; + for (i = a_width - 1; i >= 0; i--) { + BN_ULONG t1 = ap[i]; + BN_ULONG t2 = bp[i]; if (t1 != t2) { return (t1 > t2) ? 1 : -1; } @@ -114,14 +113,16 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b) { lt = 1; } - if (a->top > b->top) { + int a_width = bn_minimal_width(a); + int b_width = bn_minimal_width(b); + if (a_width > b_width) { return gt; } - if (a->top < b->top) { + if (a_width < b_width) { return lt; } - for (i = a->top - 1; i >= 0; i--) { + for (i = a_width - 1; i >= 0; i--) { t1 = a->d[i]; t2 = b->d[i]; if (t1 > t2) { @@ -176,21 +177,43 @@ int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl) { return bn_cmp_words(a, b, cl); } -int bn_less_than_words(const BN_ULONG *a, const BN_ULONG *b, size_t len) { +static int bn_less_than_words_impl(const BN_ULONG *a, size_t a_len, + const BN_ULONG *b, size_t b_len) { OPENSSL_COMPILE_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t), crypto_word_t_too_small); int ret = 0; - // Process the words in little-endian order. - for (size_t i = 0; i < len; i++) { + // Process the common words in little-endian order. + size_t min = a_len < b_len ? a_len : b_len; + for (size_t i = 0; i < min; i++) { crypto_word_t eq = constant_time_eq_w(a[i], b[i]); crypto_word_t lt = constant_time_lt_w(a[i], b[i]); ret = constant_time_select_int(eq, ret, constant_time_select_int(lt, 1, 0)); } + + // If |a| or |b| has non-zero words beyond |min|, they take precedence. + if (a_len < b_len) { + crypto_word_t mask = 0; + for (size_t i = a_len; i < b_len; i++) { + mask |= b[i]; + } + ret = constant_time_select_int(constant_time_is_zero_w(mask), ret, 1); + } else if (b_len < a_len) { + crypto_word_t mask = 0; + for (size_t i = b_len; i < a_len; i++) { + mask |= a[i]; + } + ret = constant_time_select_int(constant_time_is_zero_w(mask), ret, 0); + } + return ret; } +int bn_less_than_words(const BN_ULONG *a, const BN_ULONG *b, size_t len) { + return bn_less_than_words_impl(a, len, b, len); +} + int BN_abs_is_word(const BIGNUM *bn, BN_ULONG w) { - switch (bn->top) { + switch (bn_minimal_width(bn)) { case 1: return bn->d[0] == w; case 0: @@ -212,7 +235,7 @@ int BN_cmp_word(const BIGNUM *a, BN_ULONG b) { } int BN_is_zero(const BIGNUM *bn) { - return bn->top == 0; + return bn_minimal_width(bn) == 0; } int BN_is_one(const BIGNUM *bn) { @@ -228,27 +251,52 @@ int BN_is_odd(const BIGNUM *bn) { } int BN_is_pow2(const BIGNUM *bn) { - if (bn->top == 0 || bn->neg) { + int width = bn_minimal_width(bn); + if (width == 0 || bn->neg) { return 0; } - for (int i = 0; i < bn->top - 1; i++) { + for (int i = 0; i < width - 1; i++) { if (bn->d[i] != 0) { return 0; } } - return 0 == (bn->d[bn->top-1] & (bn->d[bn->top-1] - 1)); + return 0 == (bn->d[width-1] & (bn->d[width-1] - 1)); } int BN_equal_consttime(const BIGNUM *a, const BIGNUM *b) { - if (a->top != b->top) { - return 0; + BN_ULONG mask = 0; + // If |a| or |b| has more words than the other, all those words must be zero. + for (int i = a->top; i < b->top; i++) { + mask |= b->d[i]; } + for (int i = b->top; i < a->top; i++) { + mask |= a->d[i]; + } + // Common words must match. + int min = a->top < b->top ? a->top : b->top; + for (int i = 0; i < min; i++) { + mask |= (a->d[i] ^ b->d[i]); + } + // The sign bit must match. + mask |= (a->neg ^ b->neg); + return mask == 0; +} - int limbs_are_equal = - CRYPTO_memcmp(a->d, b->d, (size_t)a->top * sizeof(a->d[0])) == 0; - - return constant_time_select_int(constant_time_eq_int(a->neg, b->neg), - limbs_are_equal, 0); +int BN_less_than_consttime(const BIGNUM *a, const BIGNUM *b) { + // We do not attempt to process the sign bit in constant time. Negative + // |BIGNUM|s should never occur in crypto, only calculators. + if (a->neg && !b->neg) { + return 1; + } + if (b->neg && !a->neg) { + return 0; + } + if (a->neg && b->neg) { + const BIGNUM *tmp = a; + a = b; + b = tmp; + } + return bn_less_than_words_impl(a->d, a->top, b->d, b->top); } diff --git a/src/crypto/fipsmodule/bn/exponentiation.c b/src/crypto/fipsmodule/bn/exponentiation.c index 63c1c050..9e0ddfbb 100644 --- a/src/crypto/fipsmodule/bn/exponentiation.c +++ b/src/crypto/fipsmodule/bn/exponentiation.c @@ -622,8 +622,8 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, // Allocate a montgomery context if it was not supplied by the caller. if (mont == NULL) { - new_mont = BN_MONT_CTX_new(); - if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) { + new_mont = BN_MONT_CTX_new_for_modulus(m, ctx); + if (new_mont == NULL) { goto err; } mont = new_mont; @@ -666,22 +666,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } } - // Set |r| to one in Montgomery form. If the high bit of |m| is set, |m| is - // close to R and we subtract rather than perform Montgomery reduction. - if (m->d[m->top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { - if (!bn_wexpand(r, m->top)) { - goto err; - } - // r = 2^(top*BN_BITS2) - m - r->d[0] = 0 - m->d[0]; - for (int i = 1; i < m->top; i++) { - r->d[i] = ~m->d[i]; - } - r->top = m->top; - // The upper words will be zero if the corresponding words of |m| were - // 0xfff[...], so call |bn_correct_top|. - bn_correct_top(r); - } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) { + if (!bn_one_to_montgomery(r, mont, ctx)) { goto err; } @@ -746,7 +731,6 @@ err: int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a, const BN_ULONG *p, size_t num_p, const BN_MONT_CTX *mont) { - const BN_ULONG *n = mont->N.d; size_t num_n = mont->N.top; if (num_n != num_a || num_n != num_r || num_n > BN_SMALL_MAX_WORDS) { OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); @@ -793,16 +777,7 @@ int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, } } - // Set |r| to one in Montgomery form. If the high bit of |m| is set, |m| is - // close to R and we subtract rather than perform Montgomery reduction. - if (n[num_n - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { - // r = 2^(top*BN_BITS2) - m - r[0] = 0 - n[0]; - for (size_t i = 1; i < num_n; i++) { - r[i] = ~n[i]; - } - } else if (!bn_from_montgomery_small(r, num_r, mont->RR.d, mont->RR.top, - mont)) { + if (!bn_one_to_montgomery_small(r, num_r, mont)) { goto err; } @@ -1039,8 +1014,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, // Allocate a montgomery context if it was not supplied by the caller. if (mont == NULL) { - new_mont = BN_MONT_CTX_new(); - if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) { + new_mont = BN_MONT_CTX_new_for_modulus(m, ctx); + if (new_mont == NULL) { goto err; } mont = new_mont; @@ -1118,16 +1093,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, tmp.neg = am.neg = 0; tmp.flags = am.flags = BN_FLG_STATIC_DATA; -// prepare a^0 in Montgomery domain -// by Shay Gueron's suggestion - if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { - // 2^(top*BN_BITS2) - m - tmp.d[0] = 0 - m->d[0]; - for (i = 1; i < top; i++) { - tmp.d[i] = ~m->d[i]; - } - tmp.top = top; - } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) { + if (!bn_one_to_montgomery(&tmp, mont, ctx)) { goto err; } @@ -1365,8 +1331,8 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, // Allocate a montgomery context if it was not supplied by the caller. if (mont == NULL) { - new_mont = BN_MONT_CTX_new(); - if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) { + new_mont = BN_MONT_CTX_new_for_modulus(m, ctx); + if (new_mont == NULL) { goto err; } mont = new_mont; diff --git a/src/crypto/fipsmodule/bn/internal.h b/src/crypto/fipsmodule/bn/internal.h index 706e544d..f3b8d8ad 100644 --- a/src/crypto/fipsmodule/bn/internal.h +++ b/src/crypto/fipsmodule/bn/internal.h @@ -197,8 +197,12 @@ extern "C" { #define Hw(t) ((BN_ULONG)((t) >> BN_BITS2)) #endif -// bn_correct_top decrements |bn->top| until |bn->d[top-1]| is non-zero or -// until |top| is zero. If |bn| is zero, |bn->neg| is set to zero. +// bn_minimal_width returns the minimal value of |bn->top| which fits the +// value of |bn|. +int bn_minimal_width(const BIGNUM *bn); + +// bn_correct_top decrements |bn->top| to |bn_minimal_width|. If |bn| is zero, +// |bn->neg| is set to zero. void bn_correct_top(BIGNUM *bn); // bn_wexpand ensures that |bn| has at least |words| works of space without @@ -210,10 +214,26 @@ int bn_wexpand(BIGNUM *bn, size_t words); // than a number of words. int bn_expand(BIGNUM *bn, size_t bits); +// bn_resize_words adjusts |bn->top| to be |words|. It returns one on success +// and zero on allocation error or if |bn|'s value is too large. +// +// Do not call this function outside of unit tests. Most functions currently +// require |BIGNUM|s be minimal. This function breaks that invariant. It is +// introduced early so the invariant may be relaxed incrementally. +int bn_resize_words(BIGNUM *bn, size_t words); + // bn_set_words sets |bn| to the value encoded in the |num| words in |words|, // least significant word first. int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num); +// bn_fits_in_words returns one if |bn| may be represented in |num| words, plus +// a sign bit, and zero otherwise. +int bn_fits_in_words(const BIGNUM *bn, size_t num); + +// bn_copy_words copies the value of |bn| to |out| and returns one if the value +// is representable in |num| words. Otherwise, it returns zero. +int bn_copy_words(BN_ULONG *out, size_t num, const BIGNUM *bn); + // bn_mul_add_words multiples |ap| by |w|, adds the result to |rp|, and places // the result in |rp|. |ap| and |rp| must both be |num| words long. It returns // the carry word of the operation. |ap| and |rp| may be equal but otherwise may @@ -326,6 +346,15 @@ int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); // otherwise. int bn_is_bit_set_words(const BN_ULONG *a, size_t num, unsigned bit); +// bn_one_to_montgomery sets |r| to one in Montgomery form. It returns one on +// success and zero on error. This function treats the bit width of the modulus +// as public. +int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx); + +// bn_less_than_montgomery_R returns one if |bn| is less than the Montgomery R +// value for |mont| and zero otherwise. +int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont); + // Low-level operations for small numbers. // @@ -372,6 +401,13 @@ int bn_to_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, int bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a, const BN_MONT_CTX *mont); +// bn_one_to_montgomery_small sets |r| to one in Montgomery form. It returns one +// on success and zero on error. |num_r| must be the length of the modulus, +// which is |mont->N.top|. This function treats the bit width of the modulus as +// public. +int bn_one_to_montgomery_small(BN_ULONG *r, size_t num_r, + const BN_MONT_CTX *mont); + // bn_mod_mul_montgomery_small sets |r| to |a| * |b| mod |mont->N|. Both inputs // and outputs are in the Montgomery domain. |num_r| must be the length of the // modulus, which is |mont->N.top|. This function returns one on success and diff --git a/src/crypto/fipsmodule/bn/montgomery.c b/src/crypto/fipsmodule/bn/montgomery.c index e8505dae..a51725c7 100644 --- a/src/crypto/fipsmodule/bn/montgomery.c +++ b/src/crypto/fipsmodule/bn/montgomery.c @@ -126,10 +126,6 @@ #define OPENSSL_BN_ASM_MONT #endif -static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a, - const BIGNUM *b, - const BN_MONT_CTX *mont, BN_CTX *ctx); - BN_MONT_CTX *BN_MONT_CTX_new(void) { BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX)); @@ -193,6 +189,10 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) { OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR); return 0; } + // |mont->N| is always stored minimally. Computing RR efficiently leaks the + // size of the modulus. While the modulus may be private in RSA (one of the + // primes), their sizes are public, so this is fine. + bn_correct_top(&mont->N); // Find n0 such that n0 * N == -1 (mod r). // @@ -200,7 +200,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) { // others, we could use a shorter R value and use faster |BN_ULONG|-based // math instead of |uint64_t|-based math, which would be double-precision. // However, currently only the assembler files know which is which. - uint64_t n0 = bn_mont_n0(mod); + uint64_t n0 = bn_mont_n0(&mont->N); mont->n0[0] = (BN_ULONG)n0; #if BN_MONT_CTX_N0_LIMBS == 2 mont->n0[1] = (BN_ULONG)(n0 >> BN_BITS2); @@ -215,7 +215,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) { // as |BN_MONT_CTX_N0_LIMBS| is either one or two. // // XXX: This is not constant time with respect to |mont->N|, but it should be. - unsigned lgBigR = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2; + unsigned lgBigR = mont->N.top * BN_BITS2; if (!bn_mod_exp_base_2_vartime(&mont->RR, lgBigR * 2, &mont->N)) { return 0; } @@ -223,6 +223,16 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) { return 1; } +BN_MONT_CTX *BN_MONT_CTX_new_for_modulus(const BIGNUM *mod, BN_CTX *ctx) { + BN_MONT_CTX *mont = BN_MONT_CTX_new(); + if (mont == NULL || + !BN_MONT_CTX_set(mont, mod, ctx)) { + BN_MONT_CTX_free(mont); + return NULL; + } + return mont; +} + int BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_MUTEX *lock, const BIGNUM *mod, BN_CTX *bn_ctx) { CRYPTO_MUTEX_lock_read(lock); @@ -234,25 +244,12 @@ int BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_MUTEX *lock, } CRYPTO_MUTEX_lock_write(lock); - ctx = *pmont; - if (ctx) { - goto out; - } - - ctx = BN_MONT_CTX_new(); - if (ctx == NULL) { - goto out; + if (*pmont == NULL) { + *pmont = BN_MONT_CTX_new_for_modulus(mod, bn_ctx); } - if (!BN_MONT_CTX_set(ctx, mod, bn_ctx)) { - BN_MONT_CTX_free(ctx); - ctx = NULL; - goto out; - } - *pmont = ctx; - -out: + const int ok = *pmont != NULL; CRYPTO_MUTEX_unlock_write(lock); - return ctx != NULL; + return ok; } int BN_to_montgomery(BIGNUM *ret, const BIGNUM *a, const BN_MONT_CTX *mont, @@ -304,6 +301,11 @@ static int bn_from_montgomery_in_place(BN_ULONG *r, size_t num_r, BN_ULONG *a, static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, const BN_MONT_CTX *mont) { + if (r->neg) { + OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER); + return 0; + } + const BIGNUM *n = &mont->N; if (n->top == 0) { ret->top = 0; @@ -311,21 +313,16 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, } int max = (2 * n->top); // carry is stored separately - if (!bn_wexpand(r, max) || + if (!bn_resize_words(r, max) || !bn_wexpand(ret, n->top)) { return 0; } - // Clear the top words of |r|. - if (max > r->top) { - OPENSSL_memset(r->d + r->top, 0, (max - r->top) * sizeof(BN_ULONG)); - } - r->top = max; ret->top = n->top; if (!bn_from_montgomery_in_place(ret->d, ret->top, r->d, r->top, mont)) { return 0; } - ret->neg = r->neg; + ret->neg = 0; bn_correct_top(r); bn_correct_top(ret); @@ -352,35 +349,27 @@ err: return ret; } -int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const BN_MONT_CTX *mont, BN_CTX *ctx) { -#if !defined(OPENSSL_BN_ASM_MONT) - return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx); -#else - int num = mont->N.top; - - // |bn_mul_mont| requires at least 128 bits of limbs, at least for x86. - if (num < (128 / BN_BITS2) || - a->top != num || - b->top != num) { - return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx); - } - - if (!bn_wexpand(r, num)) { - return 0; - } - if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { - // The check above ensures this won't happen. - assert(0); - OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR); - return 0; +int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx) { + // If the high bit of |n| is set, R = 2^(top*BN_BITS2) < 2 * |n|, so we + // compute R - |n| rather than perform Montgomery reduction. + const BIGNUM *n = &mont->N; + if (n->top > 0 && (n->d[n->top - 1] >> (BN_BITS2 - 1)) != 0) { + if (!bn_wexpand(r, n->top)) { + return 0; + } + r->d[0] = 0 - n->d[0]; + for (int i = 1; i < n->top; i++) { + r->d[i] = ~n->d[i]; + } + r->top = n->top; + r->neg = 0; + // The upper words will be zero if the corresponding words of |n| were + // 0xfff[...], so call |bn_correct_top|. + bn_correct_top(r); + return 1; } - r->neg = a->neg ^ b->neg; - r->top = num; - bn_correct_top(r); - return 1; -#endif + return BN_from_montgomery(r, &mont->RR, mont, ctx); } static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a, @@ -417,6 +406,44 @@ err: return ret; } +int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BN_MONT_CTX *mont, BN_CTX *ctx) { + if (a->neg || b->neg) { + OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER); + return 0; + } + +#if defined(OPENSSL_BN_ASM_MONT) + // |bn_mul_mont| requires at least 128 bits of limbs, at least for x86. + int num = mont->N.top; + if (num >= (128 / BN_BITS2) && + a->top == num && + b->top == num) { + if (!bn_wexpand(r, num)) { + return 0; + } + if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { + // The check above ensures this won't happen. + assert(0); + OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR); + return 0; + } + r->neg = 0; + r->top = num; + bn_correct_top(r); + + return 1; + } +#endif + + return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx); +} + +int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont) { + return !BN_is_negative(bn) && + bn_fits_in_words(bn, mont->N.top); +} + int bn_to_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a, const BN_MONT_CTX *mont) { return bn_mod_mul_montgomery_small(r, num_r, a, num_a, mont->RR.d, @@ -439,6 +466,28 @@ int bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, return ret; } +int bn_one_to_montgomery_small(BN_ULONG *r, size_t num_r, + const BN_MONT_CTX *mont) { + const BN_ULONG *n = mont->N.d; + size_t num_n = mont->N.top; + if (num_n == 0 || num_r != num_n) { + OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return 0; + } + + // If the high bit of |n| is set, R = 2^(num_n*BN_BITS2) < 2 * |n|, so we + // compute R - |n| rather than perform Montgomery reduction. + if (num_n > 0 && (n[num_n - 1] >> (BN_BITS2 - 1)) != 0) { + r[0] = 0 - n[0]; + for (size_t i = 1; i < num_n; i++) { + r[i] = ~n[i]; + } + return 1; + } + + return bn_from_montgomery_small(r, num_r, mont->RR.d, mont->RR.top, mont); +} + int bn_mod_mul_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a, const BN_ULONG *b, size_t num_b, const BN_MONT_CTX *mont) { diff --git a/src/crypto/fipsmodule/bn/prime.c b/src/crypto/fipsmodule/bn/prime.c index 691d0cba..a291f7a0 100644 --- a/src/crypto/fipsmodule/bn/prime.c +++ b/src/crypto/fipsmodule/bn/prime.c @@ -586,9 +586,8 @@ int BN_enhanced_miller_rabin_primality_test( } // Montgomery setup for computations mod A - mont = BN_MONT_CTX_new(); - if (mont == NULL || - !BN_MONT_CTX_set(mont, w, ctx)) { + mont = BN_MONT_CTX_new_for_modulus(w, ctx); + if (mont == NULL) { goto err; } diff --git a/src/crypto/fipsmodule/ec/ec.c b/src/crypto/fipsmodule/ec/ec.c index c9687a61..616df162 100644 --- a/src/crypto/fipsmodule/ec/ec.c +++ b/src/crypto/fipsmodule/ec/ec.c @@ -389,9 +389,8 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, } BN_MONT_CTX_free(group->order_mont); - group->order_mont = BN_MONT_CTX_new(); - if (group->order_mont == NULL || - !BN_MONT_CTX_set(group->order_mont, &group->order, NULL)) { + group->order_mont = BN_MONT_CTX_new_for_modulus(&group->order, NULL); + if (group->order_mont == NULL) { return 0; } @@ -448,9 +447,8 @@ static EC_GROUP *ec_group_new_from_data(const struct built_in_curve *curve) { goto err; } - group->order_mont = BN_MONT_CTX_new(); - if (group->order_mont == NULL || - !BN_MONT_CTX_set(group->order_mont, &group->order, ctx)) { + group->order_mont = BN_MONT_CTX_new_for_modulus(&group->order, ctx); + if (group->order_mont == NULL) { OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB); goto err; } @@ -768,6 +766,9 @@ int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *point, } if (!EC_POINT_is_on_curve(group, point, ctx)) { + // In the event of an error, defend against the caller not checking the + // return value by setting a known safe value: the base point. + EC_POINT_copy(point, EC_GROUP_get0_generator(group)); OPENSSL_PUT_ERROR(EC, EC_R_POINT_IS_NOT_ON_CURVE); return 0; } @@ -952,12 +953,10 @@ int ec_bignum_to_scalar(const EC_GROUP *group, EC_SCALAR *out, int ec_bignum_to_scalar_unchecked(const EC_GROUP *group, EC_SCALAR *out, const BIGNUM *in) { - if (BN_is_negative(in) || in->top > group->order.top) { + if (!bn_copy_words(out->words, group->order.top, in)) { OPENSSL_PUT_ERROR(EC, EC_R_INVALID_SCALAR); return 0; } - OPENSSL_memset(out->words, 0, group->order.top * sizeof(BN_ULONG)); - OPENSSL_memcpy(out->words, in->d, in->top * sizeof(BN_ULONG)); return 1; } diff --git a/src/crypto/fipsmodule/ec/ec_montgomery.c b/src/crypto/fipsmodule/ec/ec_montgomery.c index 898cf07a..165c06f1 100644 --- a/src/crypto/fipsmodule/ec/ec_montgomery.c +++ b/src/crypto/fipsmodule/ec/ec_montgomery.c @@ -93,7 +93,6 @@ void ec_GFp_mont_group_finish(EC_GROUP *group) { int ec_GFp_mont_group_set_curve(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { BN_CTX *new_ctx = NULL; - BN_MONT_CTX *mont = NULL; int ret = 0; BN_MONT_CTX_free(group->mont); @@ -106,18 +105,12 @@ int ec_GFp_mont_group_set_curve(EC_GROUP *group, const BIGNUM *p, } } - mont = BN_MONT_CTX_new(); - if (mont == NULL) { - goto err; - } - if (!BN_MONT_CTX_set(mont, p, ctx)) { + group->mont = BN_MONT_CTX_new_for_modulus(p, ctx); + if (group->mont == NULL) { OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB); goto err; } - group->mont = mont; - mont = NULL; - ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); if (!ret) { @@ -127,7 +120,6 @@ int ec_GFp_mont_group_set_curve(EC_GROUP *group, const BIGNUM *p, err: BN_CTX_free(new_ctx); - BN_MONT_CTX_free(mont); return ret; } diff --git a/src/crypto/fipsmodule/ec/ec_test.cc b/src/crypto/fipsmodule/ec/ec_test.cc index e69f8d72..8a215e91 100644 --- a/src/crypto/fipsmodule/ec/ec_test.cc +++ b/src/crypto/fipsmodule/ec/ec_test.cc @@ -28,6 +28,7 @@ #include <openssl/nid.h> #include <openssl/obj.h> +#include "../bn/internal.h" #include "../../test/test_util.h" @@ -553,6 +554,32 @@ TEST_P(ECCurveTest, Mul) { EXPECT_EQ(0, EC_POINT_cmp(group.get(), result.get(), generator, nullptr)); } +#if !defined(BORINGSSL_SHARED_LIBRARY) +TEST_P(ECCurveTest, MulNonMinimal) { + bssl::UniquePtr<EC_GROUP> group(EC_GROUP_new_by_curve_name(GetParam().nid)); + ASSERT_TRUE(group); + + bssl::UniquePtr<BIGNUM> forty_two(BN_new()); + ASSERT_TRUE(forty_two); + ASSERT_TRUE(BN_set_word(forty_two.get(), 42)); + + // Compute g × 42. + bssl::UniquePtr<EC_POINT> point(EC_POINT_new(group.get())); + ASSERT_TRUE(point); + ASSERT_TRUE(EC_POINT_mul(group.get(), point.get(), forty_two.get(), nullptr, + nullptr, nullptr)); + + // Compute it again with a non-minimal 42, much larger than the scalar. + ASSERT_TRUE(bn_resize_words(forty_two.get(), 64)); + + bssl::UniquePtr<EC_POINT> point2(EC_POINT_new(group.get())); + ASSERT_TRUE(point2); + ASSERT_TRUE(EC_POINT_mul(group.get(), point2.get(), forty_two.get(), nullptr, + nullptr, nullptr)); + EXPECT_EQ(0, EC_POINT_cmp(group.get(), point.get(), point2.get(), nullptr)); +} +#endif // BORINGSSL_SHARED_LIBRARY + // Test that EC_KEY_set_private_key rejects invalid values. TEST_P(ECCurveTest, SetInvalidPrivateKey) { bssl::UniquePtr<EC_KEY> key(EC_KEY_new_by_curve_name(GetParam().nid)); @@ -572,6 +599,43 @@ TEST_P(ECCurveTest, SetInvalidPrivateKey) { ERR_clear_error(); } +TEST_P(ECCurveTest, IgnoreOct2PointReturnValue) { + bssl::UniquePtr<EC_GROUP> group(EC_GROUP_new_by_curve_name(GetParam().nid)); + ASSERT_TRUE(group); + + bssl::UniquePtr<BIGNUM> forty_two(BN_new()); + ASSERT_TRUE(forty_two); + ASSERT_TRUE(BN_set_word(forty_two.get(), 42)); + + // Compute g × 42. + bssl::UniquePtr<EC_POINT> point(EC_POINT_new(group.get())); + ASSERT_TRUE(point); + ASSERT_TRUE(EC_POINT_mul(group.get(), point.get(), forty_two.get(), nullptr, + nullptr, nullptr)); + + // Serialize the point. + size_t serialized_len = + EC_POINT_point2oct(group.get(), point.get(), + POINT_CONVERSION_UNCOMPRESSED, nullptr, 0, nullptr); + ASSERT_NE(0u, serialized_len); + + std::vector<uint8_t> serialized(serialized_len); + ASSERT_EQ(serialized_len, + EC_POINT_point2oct(group.get(), point.get(), + POINT_CONVERSION_UNCOMPRESSED, serialized.data(), + serialized_len, nullptr)); + + // Create a serialized point that is not on the curve. + serialized[serialized_len - 1]++; + + ASSERT_FALSE(EC_POINT_oct2point(group.get(), point.get(), serialized.data(), + serialized.size(), nullptr)); + // After a failure, |point| should have been set to the generator to defend + // against code that doesn't check the return value. + ASSERT_EQ(0, EC_POINT_cmp(group.get(), point.get(), + EC_GROUP_get0_generator(group.get()), nullptr)); +} + static std::vector<EC_builtin_curve> AllCurves() { const size_t num_curves = EC_get_builtin_curves(nullptr, 0); std::vector<EC_builtin_curve> curves(num_curves); diff --git a/src/crypto/fipsmodule/ec/oct.c b/src/crypto/fipsmodule/ec/oct.c index 96c138a1..3a6b4dd3 100644 --- a/src/crypto/fipsmodule/ec/oct.c +++ b/src/crypto/fipsmodule/ec/oct.c @@ -77,11 +77,9 @@ static size_t ec_GFp_simple_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form, uint8_t *buf, size_t len, BN_CTX *ctx) { - size_t ret; + size_t ret = 0; BN_CTX *new_ctx = NULL; int used_ctx = 0; - BIGNUM *x, *y; - size_t field_len, i; if ((form != POINT_CONVERSION_COMPRESSED) && (form != POINT_CONVERSION_UNCOMPRESSED)) { @@ -94,14 +92,16 @@ static size_t ec_GFp_simple_point2oct(const EC_GROUP *group, goto err; } - // ret := required output buffer length - field_len = BN_num_bytes(&group->field); - ret = - (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2 * field_len; + const size_t field_len = BN_num_bytes(&group->field); + size_t output_len = 1 /* type byte */ + field_len; + if (form == POINT_CONVERSION_UNCOMPRESSED) { + // Uncompressed points have a second coordinate. + output_len += field_len; + } // if 'buf' is NULL, just return required length if (buf != NULL) { - if (len < ret) { + if (len < output_len) { OPENSSL_PUT_ERROR(EC, EC_R_BUFFER_TOO_SMALL); goto err; } @@ -115,8 +115,8 @@ static size_t ec_GFp_simple_point2oct(const EC_GROUP *group, BN_CTX_start(ctx); used_ctx = 1; - x = BN_CTX_get(ctx); - y = BN_CTX_get(ctx); + BIGNUM *x = BN_CTX_get(ctx); + BIGNUM *y = BN_CTX_get(ctx); if (y == NULL) { goto err; } @@ -131,7 +131,7 @@ static size_t ec_GFp_simple_point2oct(const EC_GROUP *group, } else { buf[0] = form; } - i = 1; + size_t i = 1; if (!BN_bn2bin_padded(buf + i, field_len, x)) { OPENSSL_PUT_ERROR(EC, ERR_R_INTERNAL_ERROR); @@ -147,70 +147,66 @@ static size_t ec_GFp_simple_point2oct(const EC_GROUP *group, i += field_len; } - if (i != ret) { + if (i != output_len) { OPENSSL_PUT_ERROR(EC, ERR_R_INTERNAL_ERROR); goto err; } } - if (used_ctx) { - BN_CTX_end(ctx); - } - BN_CTX_free(new_ctx); - return ret; + ret = output_len; err: if (used_ctx) { BN_CTX_end(ctx); } BN_CTX_free(new_ctx); - return 0; + return ret; } - static int ec_GFp_simple_oct2point(const EC_GROUP *group, EC_POINT *point, const uint8_t *buf, size_t len, BN_CTX *ctx) { - point_conversion_form_t form; - int y_bit; BN_CTX *new_ctx = NULL; - BIGNUM *x, *y; - size_t field_len, enc_len; - int ret = 0; + int ret = 0, used_ctx = 0; if (len == 0) { OPENSSL_PUT_ERROR(EC, EC_R_BUFFER_TOO_SMALL); - return 0; + goto err; } - form = buf[0]; - y_bit = form & 1; + + point_conversion_form_t form = buf[0]; + const int y_bit = form & 1; form = form & ~1U; if ((form != POINT_CONVERSION_COMPRESSED && form != POINT_CONVERSION_UNCOMPRESSED) || (form == POINT_CONVERSION_UNCOMPRESSED && y_bit)) { OPENSSL_PUT_ERROR(EC, EC_R_INVALID_ENCODING); - return 0; + goto err; } - field_len = BN_num_bytes(&group->field); - enc_len = - (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2 * field_len; + const size_t field_len = BN_num_bytes(&group->field); + size_t enc_len = 1 /* type byte */ + field_len; + if (form == POINT_CONVERSION_UNCOMPRESSED) { + // Uncompressed points have a second coordinate. + enc_len += field_len; + } if (len != enc_len) { OPENSSL_PUT_ERROR(EC, EC_R_INVALID_ENCODING); - return 0; + goto err; } if (ctx == NULL) { ctx = new_ctx = BN_CTX_new(); if (ctx == NULL) { - return 0; + goto err; } } BN_CTX_start(ctx); - x = BN_CTX_get(ctx); - y = BN_CTX_get(ctx); + used_ctx = 1; + BIGNUM *x = BN_CTX_get(ctx); + BIGNUM *y = BN_CTX_get(ctx); if (x == NULL || y == NULL) { goto err; } @@ -244,7 +240,9 @@ static int ec_GFp_simple_oct2point(const EC_GROUP *group, EC_POINT *point, ret = 1; err: - BN_CTX_end(ctx); + if (used_ctx) { + BN_CTX_end(ctx); + } BN_CTX_free(new_ctx); return ret; } diff --git a/src/crypto/fipsmodule/ec/p256-x86_64.c b/src/crypto/fipsmodule/ec/p256-x86_64.c index 0e79b6dc..ec371bf0 100644 --- a/src/crypto/fipsmodule/ec/p256-x86_64.c +++ b/src/crypto/fipsmodule/ec/p256-x86_64.c @@ -205,13 +205,7 @@ static void ecp_nistz256_mod_inverse_mont(BN_ULONG r[P256_LIMBS], // returns one if it fits. Otherwise it returns zero. static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS], const BIGNUM *in) { - if (in->top > P256_LIMBS) { - return 0; - } - - OPENSSL_memset(out, 0, sizeof(BN_ULONG) * P256_LIMBS); - OPENSSL_memcpy(out, in->d, sizeof(BN_ULONG) * in->top); - return 1; + return bn_copy_words(out, P256_LIMBS, in); } // r = p * p_scalar diff --git a/src/crypto/fipsmodule/ec/p256-x86_64_test.cc b/src/crypto/fipsmodule/ec/p256-x86_64_test.cc index a802bfb5..5cd701ba 100644 --- a/src/crypto/fipsmodule/ec/p256-x86_64_test.cc +++ b/src/crypto/fipsmodule/ec/p256-x86_64_test.cc @@ -160,17 +160,16 @@ static bool PointToAffine(P256_POINT_AFFINE *out, const P256_POINT *in) { return false; } - OPENSSL_memset(out, 0, sizeof(P256_POINT_AFFINE)); - if (BN_is_zero(z.get())) { // The point at infinity is represented as (0, 0). + OPENSSL_memset(out, 0, sizeof(P256_POINT_AFFINE)); return true; } bssl::UniquePtr<BN_CTX> ctx(BN_CTX_new()); - bssl::UniquePtr<BN_MONT_CTX> mont(BN_MONT_CTX_new()); + bssl::UniquePtr<BN_MONT_CTX> mont( + BN_MONT_CTX_new_for_modulus(p.get(), ctx.get())); if (!ctx || !mont || - !BN_MONT_CTX_set(mont.get(), p.get(), ctx.get()) || // Invert Z. !BN_from_montgomery(z.get(), z.get(), mont.get(), ctx.get()) || !BN_mod_inverse(z.get(), z.get(), p.get(), ctx.get()) || @@ -185,12 +184,11 @@ static bool PointToAffine(P256_POINT_AFFINE *out, const P256_POINT *in) { !BN_mod_mul_montgomery(y.get(), y.get(), z.get(), mont.get(), ctx.get()) || !BN_mod_mul_montgomery(y.get(), y.get(), z.get(), mont.get(), - ctx.get())) { + ctx.get()) || + !bn_copy_words(out->X, P256_LIMBS, x.get()) || + !bn_copy_words(out->Y, P256_LIMBS, y.get())) { return false; } - - OPENSSL_memcpy(out->X, x->d, sizeof(BN_ULONG) * x->top); - OPENSSL_memcpy(out->Y, y->d, sizeof(BN_ULONG) * y->top); return true; } diff --git a/src/crypto/fipsmodule/rsa/rsa_impl.c b/src/crypto/fipsmodule/rsa/rsa_impl.c index b5a4e515..626bbe85 100644 --- a/src/crypto/fipsmodule/rsa/rsa_impl.c +++ b/src/crypto/fipsmodule/rsa/rsa_impl.c @@ -646,12 +646,11 @@ err: static int mod_montgomery(BIGNUM *r, const BIGNUM *I, const BIGNUM *p, const BN_MONT_CTX *mont_p, const BIGNUM *q, BN_CTX *ctx) { - // Reduce in constant time with Montgomery reduction, which requires I <= p * - // R. If p and q are the same size, which is true for any RSA keys we or - // anyone sane generates, we have q < R and I < p * q, so this holds. - // - // If q is too big, fall back to |BN_mod|. - if (q->top > p->top) { + // Reducing in constant-time with Montgomery reduction requires I <= p * R. We + // have I < p * q, so this follows if q < R. In particular, this always holds + // if p and q are the same size, which is true for any RSA keys we or anyone + // sane generates. For other keys, we fall back to |BN_mod|. + if (!bn_less_than_montgomery_R(q, mont_p)) { return BN_mod(r, I, p, ctx); } @@ -838,7 +837,8 @@ int rsa_greater_than_pow2(const BIGNUM *b, int n) { // relatively prime to |e|. If |p| is non-NULL, |out| will also not be close to // |p|. static int generate_prime(BIGNUM *out, int bits, const BIGNUM *e, - const BIGNUM *p, BN_CTX *ctx, BN_GENCB *cb) { + const BIGNUM *p, const BIGNUM *sqrt2, BN_CTX *ctx, + BN_GENCB *cb) { if (bits < 128 || (bits % BN_BITS2) != 0) { OPENSSL_PUT_ERROR(RSA, ERR_R_INTERNAL_ERROR); return 0; @@ -882,30 +882,14 @@ static int generate_prime(BIGNUM *out, int bits, const BIGNUM *e, } } - // If out < 2^(bits-1)×√2, try again (steps 4.4 and 5.5). - // - // We check the most significant words, so we retry if ⌊out/2^k⌋ <= ⌊b/2^k⌋, - // where b = 2^(bits-1)×√2 and k = max(0, bits - 1536). For key sizes up to - // 3072 (bits = 1536), k = 0, so we are testing that ⌊out⌋ <= ⌊b⌋. out is an - // integer and b is not, so this is equivalent to out < b. That is, the - // comparison is exact for FIPS key sizes. + // If out < 2^(bits-1)×√2, try again (steps 4.4 and 5.5). This is equivalent + // to out <= ⌊2^(bits-1)×√2⌋, or out <= sqrt2 for FIPS key sizes. // // For larger keys, the comparison is approximate, leaning towards // retrying. That is, we reject a negligible fraction of primes that are // within the FIPS bound, but we will never accept a prime outside the - // bound, ensuring the resulting RSA key is the right size. Specifically, if - // the FIPS bound holds, we have ⌊out/2^k⌋ < out/2^k < b/2^k. This implies - // ⌊out/2^k⌋ <= ⌊b/2^k⌋. That is, the FIPS bound implies our bound and so we - // are slightly tighter. - size_t out_len = (size_t)out->top; - assert(out_len == (size_t)bits / BN_BITS2); - size_t to_check = kBoringSSLRSASqrtTwoLen; - if (to_check > out_len) { - to_check = out_len; - } - if (!bn_less_than_words( - kBoringSSLRSASqrtTwo + kBoringSSLRSASqrtTwoLen - to_check, - out->d + out_len - to_check, to_check)) { + // bound, ensuring the resulting RSA key is the right size. + if (!BN_less_than_consttime(sqrt2, out)) { continue; } @@ -969,7 +953,9 @@ int RSA_generate_key_ex(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb) { BIGNUM *pm1 = BN_CTX_get(ctx); BIGNUM *qm1 = BN_CTX_get(ctx); BIGNUM *gcd = BN_CTX_get(ctx); - if (totient == NULL || pm1 == NULL || qm1 == NULL || gcd == NULL) { + BIGNUM *sqrt2 = BN_CTX_get(ctx); + if (totient == NULL || pm1 == NULL || qm1 == NULL || gcd == NULL || + sqrt2 == NULL) { goto bn_err; } @@ -990,12 +976,35 @@ int RSA_generate_key_ex(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb) { } int prime_bits = bits / 2; + + // Compute sqrt2 >= ⌊2^(prime_bits-1)×√2⌋. + if (!bn_set_words(sqrt2, kBoringSSLRSASqrtTwo, kBoringSSLRSASqrtTwoLen)) { + goto bn_err; + } + int sqrt2_bits = kBoringSSLRSASqrtTwoLen * BN_BITS2; + assert(sqrt2_bits == (int)BN_num_bits(sqrt2)); + if (sqrt2_bits > prime_bits) { + // For key sizes up to 3072 (prime_bits = 1536), this is exactly + // ⌊2^(prime_bits-1)×√2⌋. + if (!BN_rshift(sqrt2, sqrt2, sqrt2_bits - prime_bits)) { + goto bn_err; + } + } else if (prime_bits > sqrt2_bits) { + // For key sizes beyond 3072, this is approximate. We err towards retrying + // to ensure our key is the right size and round up. + if (!BN_add_word(sqrt2, 1) || + !BN_lshift(sqrt2, sqrt2, prime_bits - sqrt2_bits)) { + goto bn_err; + } + } + assert(prime_bits == (int)BN_num_bits(sqrt2)); + do { // Generate p and q, each of size |prime_bits|, using the steps outlined in // appendix FIPS 186-4 appendix B.3.3. - if (!generate_prime(rsa->p, prime_bits, rsa->e, NULL, ctx, cb) || + if (!generate_prime(rsa->p, prime_bits, rsa->e, NULL, sqrt2, ctx, cb) || !BN_GENCB_call(cb, 3, 0) || - !generate_prime(rsa->q, prime_bits, rsa->e, rsa->p, ctx, cb) || + !generate_prime(rsa->q, prime_bits, rsa->e, rsa->p, sqrt2, ctx, cb) || !BN_GENCB_call(cb, 3, 1)) { goto bn_err; } |