diff options
author | Pete Bentley <prb@google.com> | 2022-04-11 08:45:47 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2022-04-11 08:45:47 +0000 |
commit | ea823ff68de78c10d2545416043e6d4e77b01204 (patch) | |
tree | 1f337973194ebe2e527c0085a51899e13686b0a6 /src/third_party/fiat/p256_64.h | |
parent | fe406ed0f9fc6f5f2b0d02b1d0bc8c5c898d395a (diff) | |
parent | b439634923935b5baf32358442528b413ea9512f (diff) | |
download | boringssl-ea823ff68de78c10d2545416043e6d4e77b01204.tar.gz |
Merge "external/boringssl: Sync to c9a7dd687987666df5910f2b35fdc8c3d1e5ed05."
Diffstat (limited to 'src/third_party/fiat/p256_64.h')
-rw-r--r-- | src/third_party/fiat/p256_64.h | 1759 |
1 files changed, 1272 insertions, 487 deletions
diff --git a/src/third_party/fiat/p256_64.h b/src/third_party/fiat/p256_64.h index 773266a0..c7726384 100644 --- a/src/third_party/fiat/p256_64.h +++ b/src/third_party/fiat/p256_64.h @@ -1,8 +1,8 @@ -/* Autogenerated: src/ExtractionOCaml/word_by_word_montgomery --static p256 '2^256 - 2^224 + 2^192 + 2^96 - 1' 64 mul square add sub opp from_montgomery nonzero selectznz to_bytes from_bytes */ +/* Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --inline --static --use-value-barrier p256 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp */ /* curve description: p256 */ -/* requested operations: mul, square, add, sub, opp, from_montgomery, nonzero, selectznz, to_bytes, from_bytes */ -/* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */ /* machine_wordsize = 64 (from "64") */ +/* requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp */ +/* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */ /* */ /* NOTE: In addition to the bounds specified above each function, all */ /* functions synthesized for this Montgomery arithmetic require the */ @@ -10,20 +10,52 @@ /* require the input to be in the unique saturated representation. */ /* All functions also ensure that these two properties are true of */ /* return values. */ +/* */ +/* Computed values: */ +/* eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */ +/* twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in */ +/* if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256 */ #include <stdint.h> typedef unsigned char fiat_p256_uint1; typedef signed char fiat_p256_int1; -typedef signed __int128 fiat_p256_int128; -typedef unsigned __int128 fiat_p256_uint128; +#if defined(__GNUC__) || defined(__clang__) +# define FIAT_P256_FIAT_EXTENSION __extension__ +# define FIAT_P256_FIAT_INLINE __inline__ +#else +# define FIAT_P256_FIAT_EXTENSION +# define FIAT_P256_FIAT_INLINE +#endif + +FIAT_P256_FIAT_EXTENSION typedef signed __int128 fiat_p256_int128; +FIAT_P256_FIAT_EXTENSION typedef unsigned __int128 fiat_p256_uint128; + +/* The type fiat_p256_montgomery_domain_field_element is a field element in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ +typedef uint64_t fiat_p256_montgomery_domain_field_element[4]; + +/* The type fiat_p256_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ +typedef uint64_t fiat_p256_non_montgomery_domain_field_element[4]; #if (-1 & 3) != 3 #error "This code only works on a two's complement system" #endif +#if !defined(FIAT_P256_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +static __inline__ uint64_t fiat_p256_value_barrier_u64(uint64_t a) { + __asm__("" : "+r"(a) : /* no inputs */); + return a; +} +#else +# define fiat_p256_value_barrier_u64(x) (x) +#endif + /* * The function fiat_p256_addcarryx_u64 is an addition with carry. + * * Postconditions: * out1 = (arg1 + arg2 + arg3) mod 2^64 * out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋ @@ -36,16 +68,20 @@ typedef unsigned __int128 fiat_p256_uint128; * out1: [0x0 ~> 0xffffffffffffffff] * out2: [0x0 ~> 0x1] */ -static void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) { - fiat_p256_uint128 x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3); - uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); - fiat_p256_uint1 x3 = (fiat_p256_uint1)(x1 >> 64); +static FIAT_P256_FIAT_INLINE void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) { + fiat_p256_uint128 x1; + uint64_t x2; + fiat_p256_uint1 x3; + x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3); + x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); + x3 = (fiat_p256_uint1)(x1 >> 64); *out1 = x2; *out2 = x3; } /* * The function fiat_p256_subborrowx_u64 is a subtraction with borrow. + * * Postconditions: * out1 = (-arg1 + arg2 + -arg3) mod 2^64 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋ @@ -58,16 +94,20 @@ static void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_ * out1: [0x0 ~> 0xffffffffffffffff] * out2: [0x0 ~> 0x1] */ -static void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) { - fiat_p256_int128 x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3); - fiat_p256_int1 x2 = (fiat_p256_int1)(x1 >> 64); - uint64_t x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); +static FIAT_P256_FIAT_INLINE void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) { + fiat_p256_int128 x1; + fiat_p256_int1 x2; + uint64_t x3; + x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3); + x2 = (fiat_p256_int1)(x1 >> 64); + x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); *out1 = x3; *out2 = (fiat_p256_uint1)(0x0 - x2); } /* * The function fiat_p256_mulx_u64 is a multiplication, returning the full double-width result. + * * Postconditions: * out1 = (arg1 * arg2) mod 2^64 * out2 = ⌊arg1 * arg2 / 2^64⌋ @@ -79,16 +119,20 @@ static void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat * out1: [0x0 ~> 0xffffffffffffffff] * out2: [0x0 ~> 0xffffffffffffffff] */ -static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) { - fiat_p256_uint128 x1 = ((fiat_p256_uint128)arg1 * arg2); - uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); - uint64_t x3 = (uint64_t)(x1 >> 64); +static FIAT_P256_FIAT_INLINE void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) { + fiat_p256_uint128 x1; + uint64_t x2; + uint64_t x3; + x1 = ((fiat_p256_uint128)arg1 * arg2); + x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff)); + x3 = (uint64_t)(x1 >> 64); *out1 = x2; *out2 = x3; } /* * The function fiat_p256_cmovznz_u64 is a single-word conditional move. + * * Postconditions: * out1 = (if arg1 = 0 then arg2 else arg3) * @@ -99,21 +143,19 @@ static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, ui * Output Bounds: * out1: [0x0 ~> 0xffffffffffffffff] */ -static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) { - fiat_p256_uint1 x1 = (!(!arg1)); - uint64_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff)); - // Note this line has been patched from the synthesized code to add value - // barriers. - // - // Clang recognizes this pattern as a select. While it usually transforms it - // to a cmov, it sometimes further transforms it into a branch, which we do - // not want. - uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2)); +static FIAT_P256_FIAT_INLINE void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) { + fiat_p256_uint1 x1; + uint64_t x2; + uint64_t x3; + x1 = (!(!arg1)); + x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff)); + x3 = ((fiat_p256_value_barrier_u64(x2) & arg3) | (fiat_p256_value_barrier_u64((~x2)) & arg2)); *out1 = x3; } /* * The function fiat_p256_mul multiplies two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -121,287 +163,297 @@ static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ -static void fiat_p256_mul(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) { - uint64_t x1 = (arg1[1]); - uint64_t x2 = (arg1[2]); - uint64_t x3 = (arg1[3]); - uint64_t x4 = (arg1[0]); +static FIAT_P256_FIAT_INLINE void fiat_p256_mul(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) { + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; uint64_t x5; uint64_t x6; - fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3])); uint64_t x7; uint64_t x8; - fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2])); uint64_t x9; uint64_t x10; - fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1])); uint64_t x11; uint64_t x12; - fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0])); uint64_t x13; fiat_p256_uint1 x14; - fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9); uint64_t x15; fiat_p256_uint1 x16; - fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7); uint64_t x17; fiat_p256_uint1 x18; - fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5); - uint64_t x19 = (x18 + x6); + uint64_t x19; uint64_t x20; uint64_t x21; - fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001)); uint64_t x22; uint64_t x23; - fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff)); uint64_t x24; uint64_t x25; - fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff)); uint64_t x26; fiat_p256_uint1 x27; - fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22); - uint64_t x28 = (x27 + x23); + uint64_t x28; uint64_t x29; fiat_p256_uint1 x30; - fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24); uint64_t x31; fiat_p256_uint1 x32; - fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26); uint64_t x33; fiat_p256_uint1 x34; - fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28); uint64_t x35; fiat_p256_uint1 x36; - fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20); uint64_t x37; fiat_p256_uint1 x38; - fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21); uint64_t x39; uint64_t x40; - fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3])); uint64_t x41; uint64_t x42; - fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2])); uint64_t x43; uint64_t x44; - fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1])); uint64_t x45; uint64_t x46; - fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0])); uint64_t x47; fiat_p256_uint1 x48; - fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43); uint64_t x49; fiat_p256_uint1 x50; - fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41); uint64_t x51; fiat_p256_uint1 x52; - fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39); - uint64_t x53 = (x52 + x40); + uint64_t x53; uint64_t x54; fiat_p256_uint1 x55; - fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45); uint64_t x56; fiat_p256_uint1 x57; - fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47); uint64_t x58; fiat_p256_uint1 x59; - fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49); uint64_t x60; fiat_p256_uint1 x61; - fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51); uint64_t x62; fiat_p256_uint1 x63; - fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53); uint64_t x64; uint64_t x65; - fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001)); uint64_t x66; uint64_t x67; - fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff)); uint64_t x68; uint64_t x69; - fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff)); uint64_t x70; fiat_p256_uint1 x71; - fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66); - uint64_t x72 = (x71 + x67); + uint64_t x72; uint64_t x73; fiat_p256_uint1 x74; - fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68); uint64_t x75; fiat_p256_uint1 x76; - fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70); uint64_t x77; fiat_p256_uint1 x78; - fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72); uint64_t x79; fiat_p256_uint1 x80; - fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64); uint64_t x81; fiat_p256_uint1 x82; - fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65); - uint64_t x83 = ((uint64_t)x82 + x63); + uint64_t x83; uint64_t x84; uint64_t x85; - fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3])); uint64_t x86; uint64_t x87; - fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2])); uint64_t x88; uint64_t x89; - fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1])); uint64_t x90; uint64_t x91; - fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0])); uint64_t x92; fiat_p256_uint1 x93; - fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88); uint64_t x94; fiat_p256_uint1 x95; - fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86); uint64_t x96; fiat_p256_uint1 x97; - fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84); - uint64_t x98 = (x97 + x85); + uint64_t x98; uint64_t x99; fiat_p256_uint1 x100; - fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90); uint64_t x101; fiat_p256_uint1 x102; - fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92); uint64_t x103; fiat_p256_uint1 x104; - fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94); uint64_t x105; fiat_p256_uint1 x106; - fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96); uint64_t x107; fiat_p256_uint1 x108; - fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98); uint64_t x109; uint64_t x110; - fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001)); uint64_t x111; uint64_t x112; - fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff)); uint64_t x113; uint64_t x114; - fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff)); uint64_t x115; fiat_p256_uint1 x116; - fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111); - uint64_t x117 = (x116 + x112); + uint64_t x117; uint64_t x118; fiat_p256_uint1 x119; - fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113); uint64_t x120; fiat_p256_uint1 x121; - fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115); uint64_t x122; fiat_p256_uint1 x123; - fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117); uint64_t x124; fiat_p256_uint1 x125; - fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109); uint64_t x126; fiat_p256_uint1 x127; - fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110); - uint64_t x128 = ((uint64_t)x127 + x108); + uint64_t x128; uint64_t x129; uint64_t x130; - fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3])); uint64_t x131; uint64_t x132; - fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2])); uint64_t x133; uint64_t x134; - fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1])); uint64_t x135; uint64_t x136; - fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0])); uint64_t x137; fiat_p256_uint1 x138; - fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133); uint64_t x139; fiat_p256_uint1 x140; - fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131); uint64_t x141; fiat_p256_uint1 x142; - fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129); - uint64_t x143 = (x142 + x130); + uint64_t x143; uint64_t x144; fiat_p256_uint1 x145; - fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135); uint64_t x146; fiat_p256_uint1 x147; - fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137); uint64_t x148; fiat_p256_uint1 x149; - fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139); uint64_t x150; fiat_p256_uint1 x151; - fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141); uint64_t x152; fiat_p256_uint1 x153; - fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143); uint64_t x154; uint64_t x155; - fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001)); uint64_t x156; uint64_t x157; - fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff)); uint64_t x158; uint64_t x159; - fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff)); uint64_t x160; fiat_p256_uint1 x161; - fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156); - uint64_t x162 = (x161 + x157); + uint64_t x162; uint64_t x163; fiat_p256_uint1 x164; - fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158); uint64_t x165; fiat_p256_uint1 x166; - fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160); uint64_t x167; fiat_p256_uint1 x168; - fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162); uint64_t x169; fiat_p256_uint1 x170; - fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154); uint64_t x171; fiat_p256_uint1 x172; - fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155); - uint64_t x173 = ((uint64_t)x172 + x153); + uint64_t x173; uint64_t x174; fiat_p256_uint1 x175; - fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff)); uint64_t x176; fiat_p256_uint1 x177; - fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff)); uint64_t x178; fiat_p256_uint1 x179; - fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0); uint64_t x180; fiat_p256_uint1 x181; - fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001)); uint64_t x182; fiat_p256_uint1 x183; - fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0); uint64_t x184; - fiat_p256_cmovznz_u64(&x184, x183, x174, x165); uint64_t x185; - fiat_p256_cmovznz_u64(&x185, x183, x176, x167); uint64_t x186; - fiat_p256_cmovznz_u64(&x186, x183, x178, x169); uint64_t x187; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[0]); + fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3])); + fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2])); + fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1])); + fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0])); + fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9); + fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7); + fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5); + x19 = (x18 + x6); + fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22); + x28 = (x27 + x23); + fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24); + fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26); + fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28); + fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20); + fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21); + fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3])); + fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2])); + fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1])); + fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0])); + fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43); + fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41); + fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39); + x53 = (x52 + x40); + fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45); + fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47); + fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49); + fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51); + fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53); + fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66); + x72 = (x71 + x67); + fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68); + fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70); + fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72); + fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64); + fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65); + x83 = ((uint64_t)x82 + x63); + fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3])); + fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2])); + fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1])); + fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0])); + fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88); + fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86); + fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84); + x98 = (x97 + x85); + fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90); + fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92); + fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94); + fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96); + fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98); + fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111); + x117 = (x116 + x112); + fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113); + fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115); + fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117); + fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109); + fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110); + x128 = ((uint64_t)x127 + x108); + fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3])); + fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2])); + fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1])); + fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0])); + fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133); + fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131); + fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129); + x143 = (x142 + x130); + fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135); + fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137); + fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139); + fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141); + fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143); + fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156); + x162 = (x161 + x157); + fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158); + fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160); + fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162); + fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154); + fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155); + x173 = ((uint64_t)x172 + x153); + fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff)); + fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff)); + fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0); + fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001)); + fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0); + fiat_p256_cmovznz_u64(&x184, x183, x174, x165); + fiat_p256_cmovznz_u64(&x185, x183, x176, x167); + fiat_p256_cmovznz_u64(&x186, x183, x178, x169); fiat_p256_cmovznz_u64(&x187, x183, x180, x171); out1[0] = x184; out1[1] = x185; @@ -411,292 +463,304 @@ static void fiat_p256_mul(uint64_t out1[4], const uint64_t arg1[4], const uint64 /* * The function fiat_p256_square squares a field element in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ -static void fiat_p256_square(uint64_t out1[4], const uint64_t arg1[4]) { - uint64_t x1 = (arg1[1]); - uint64_t x2 = (arg1[2]); - uint64_t x3 = (arg1[3]); - uint64_t x4 = (arg1[0]); +static FIAT_P256_FIAT_INLINE void fiat_p256_square(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) { + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; uint64_t x5; uint64_t x6; - fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3])); uint64_t x7; uint64_t x8; - fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2])); uint64_t x9; uint64_t x10; - fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1])); uint64_t x11; uint64_t x12; - fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0])); uint64_t x13; fiat_p256_uint1 x14; - fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9); uint64_t x15; fiat_p256_uint1 x16; - fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7); uint64_t x17; fiat_p256_uint1 x18; - fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5); - uint64_t x19 = (x18 + x6); + uint64_t x19; uint64_t x20; uint64_t x21; - fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001)); uint64_t x22; uint64_t x23; - fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff)); uint64_t x24; uint64_t x25; - fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff)); uint64_t x26; fiat_p256_uint1 x27; - fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22); - uint64_t x28 = (x27 + x23); + uint64_t x28; uint64_t x29; fiat_p256_uint1 x30; - fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24); uint64_t x31; fiat_p256_uint1 x32; - fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26); uint64_t x33; fiat_p256_uint1 x34; - fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28); uint64_t x35; fiat_p256_uint1 x36; - fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20); uint64_t x37; fiat_p256_uint1 x38; - fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21); uint64_t x39; uint64_t x40; - fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3])); uint64_t x41; uint64_t x42; - fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2])); uint64_t x43; uint64_t x44; - fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1])); uint64_t x45; uint64_t x46; - fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0])); uint64_t x47; fiat_p256_uint1 x48; - fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43); uint64_t x49; fiat_p256_uint1 x50; - fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41); uint64_t x51; fiat_p256_uint1 x52; - fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39); - uint64_t x53 = (x52 + x40); + uint64_t x53; uint64_t x54; fiat_p256_uint1 x55; - fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45); uint64_t x56; fiat_p256_uint1 x57; - fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47); uint64_t x58; fiat_p256_uint1 x59; - fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49); uint64_t x60; fiat_p256_uint1 x61; - fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51); uint64_t x62; fiat_p256_uint1 x63; - fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53); uint64_t x64; uint64_t x65; - fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001)); uint64_t x66; uint64_t x67; - fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff)); uint64_t x68; uint64_t x69; - fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff)); uint64_t x70; fiat_p256_uint1 x71; - fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66); - uint64_t x72 = (x71 + x67); + uint64_t x72; uint64_t x73; fiat_p256_uint1 x74; - fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68); uint64_t x75; fiat_p256_uint1 x76; - fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70); uint64_t x77; fiat_p256_uint1 x78; - fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72); uint64_t x79; fiat_p256_uint1 x80; - fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64); uint64_t x81; fiat_p256_uint1 x82; - fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65); - uint64_t x83 = ((uint64_t)x82 + x63); + uint64_t x83; uint64_t x84; uint64_t x85; - fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3])); uint64_t x86; uint64_t x87; - fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2])); uint64_t x88; uint64_t x89; - fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1])); uint64_t x90; uint64_t x91; - fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0])); uint64_t x92; fiat_p256_uint1 x93; - fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88); uint64_t x94; fiat_p256_uint1 x95; - fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86); uint64_t x96; fiat_p256_uint1 x97; - fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84); - uint64_t x98 = (x97 + x85); + uint64_t x98; uint64_t x99; fiat_p256_uint1 x100; - fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90); uint64_t x101; fiat_p256_uint1 x102; - fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92); uint64_t x103; fiat_p256_uint1 x104; - fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94); uint64_t x105; fiat_p256_uint1 x106; - fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96); uint64_t x107; fiat_p256_uint1 x108; - fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98); uint64_t x109; uint64_t x110; - fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001)); uint64_t x111; uint64_t x112; - fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff)); uint64_t x113; uint64_t x114; - fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff)); uint64_t x115; fiat_p256_uint1 x116; - fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111); - uint64_t x117 = (x116 + x112); + uint64_t x117; uint64_t x118; fiat_p256_uint1 x119; - fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113); uint64_t x120; fiat_p256_uint1 x121; - fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115); uint64_t x122; fiat_p256_uint1 x123; - fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117); uint64_t x124; fiat_p256_uint1 x125; - fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109); uint64_t x126; fiat_p256_uint1 x127; - fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110); - uint64_t x128 = ((uint64_t)x127 + x108); + uint64_t x128; uint64_t x129; uint64_t x130; - fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3])); uint64_t x131; uint64_t x132; - fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2])); uint64_t x133; uint64_t x134; - fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1])); uint64_t x135; uint64_t x136; - fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0])); uint64_t x137; fiat_p256_uint1 x138; - fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133); uint64_t x139; fiat_p256_uint1 x140; - fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131); uint64_t x141; fiat_p256_uint1 x142; - fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129); - uint64_t x143 = (x142 + x130); + uint64_t x143; uint64_t x144; fiat_p256_uint1 x145; - fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135); uint64_t x146; fiat_p256_uint1 x147; - fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137); uint64_t x148; fiat_p256_uint1 x149; - fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139); uint64_t x150; fiat_p256_uint1 x151; - fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141); uint64_t x152; fiat_p256_uint1 x153; - fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143); uint64_t x154; uint64_t x155; - fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001)); uint64_t x156; uint64_t x157; - fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff)); uint64_t x158; uint64_t x159; - fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff)); uint64_t x160; fiat_p256_uint1 x161; - fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156); - uint64_t x162 = (x161 + x157); + uint64_t x162; uint64_t x163; fiat_p256_uint1 x164; - fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158); uint64_t x165; fiat_p256_uint1 x166; - fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160); uint64_t x167; fiat_p256_uint1 x168; - fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162); uint64_t x169; fiat_p256_uint1 x170; - fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154); uint64_t x171; fiat_p256_uint1 x172; - fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155); - uint64_t x173 = ((uint64_t)x172 + x153); + uint64_t x173; uint64_t x174; fiat_p256_uint1 x175; - fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff)); uint64_t x176; fiat_p256_uint1 x177; - fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff)); uint64_t x178; fiat_p256_uint1 x179; - fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0); uint64_t x180; fiat_p256_uint1 x181; - fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001)); uint64_t x182; fiat_p256_uint1 x183; - fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0); uint64_t x184; - fiat_p256_cmovznz_u64(&x184, x183, x174, x165); uint64_t x185; - fiat_p256_cmovznz_u64(&x185, x183, x176, x167); uint64_t x186; - fiat_p256_cmovznz_u64(&x186, x183, x178, x169); uint64_t x187; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[0]); + fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3])); + fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2])); + fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1])); + fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0])); + fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9); + fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7); + fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5); + x19 = (x18 + x6); + fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22); + x28 = (x27 + x23); + fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24); + fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26); + fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28); + fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20); + fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21); + fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3])); + fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2])); + fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1])); + fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0])); + fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43); + fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41); + fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39); + x53 = (x52 + x40); + fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45); + fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47); + fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49); + fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51); + fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53); + fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66); + x72 = (x71 + x67); + fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68); + fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70); + fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72); + fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64); + fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65); + x83 = ((uint64_t)x82 + x63); + fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3])); + fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2])); + fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1])); + fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0])); + fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88); + fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86); + fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84); + x98 = (x97 + x85); + fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90); + fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92); + fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94); + fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96); + fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98); + fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111); + x117 = (x116 + x112); + fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113); + fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115); + fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117); + fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109); + fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110); + x128 = ((uint64_t)x127 + x108); + fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3])); + fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2])); + fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1])); + fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0])); + fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133); + fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131); + fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129); + x143 = (x142 + x130); + fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135); + fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137); + fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139); + fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141); + fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143); + fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156); + x162 = (x161 + x157); + fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158); + fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160); + fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162); + fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154); + fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155); + x173 = ((uint64_t)x172 + x153); + fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff)); + fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff)); + fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0); + fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001)); + fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0); + fiat_p256_cmovznz_u64(&x184, x183, x174, x165); + fiat_p256_cmovznz_u64(&x185, x183, x176, x167); + fiat_p256_cmovznz_u64(&x186, x183, x178, x169); fiat_p256_cmovznz_u64(&x187, x183, x180, x171); out1[0] = x184; out1[1] = x185; @@ -706,6 +770,7 @@ static void fiat_p256_square(uint64_t out1[4], const uint64_t arg1[4]) { /* * The function fiat_p256_add adds two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -713,47 +778,42 @@ static void fiat_p256_square(uint64_t out1[4], const uint64_t arg1[4]) { * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ -static void fiat_p256_add(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) { +static FIAT_P256_FIAT_INLINE void fiat_p256_add(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) { uint64_t x1; fiat_p256_uint1 x2; - fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0])); uint64_t x3; fiat_p256_uint1 x4; - fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1])); uint64_t x5; fiat_p256_uint1 x6; - fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2])); uint64_t x7; fiat_p256_uint1 x8; - fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3])); uint64_t x9; fiat_p256_uint1 x10; - fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff)); uint64_t x11; fiat_p256_uint1 x12; - fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff)); uint64_t x13; fiat_p256_uint1 x14; - fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0); uint64_t x15; fiat_p256_uint1 x16; - fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001)); uint64_t x17; fiat_p256_uint1 x18; - fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0); uint64_t x19; - fiat_p256_cmovznz_u64(&x19, x18, x9, x1); uint64_t x20; - fiat_p256_cmovznz_u64(&x20, x18, x11, x3); uint64_t x21; - fiat_p256_cmovznz_u64(&x21, x18, x13, x5); uint64_t x22; + fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0])); + fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1])); + fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2])); + fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3])); + fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff)); + fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff)); + fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0); + fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001)); + fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0); + fiat_p256_cmovznz_u64(&x19, x18, x9, x1); + fiat_p256_cmovznz_u64(&x20, x18, x11, x3); + fiat_p256_cmovznz_u64(&x21, x18, x13, x5); fiat_p256_cmovznz_u64(&x22, x18, x15, x7); out1[0] = x19; out1[1] = x20; @@ -763,6 +823,7 @@ static void fiat_p256_add(uint64_t out1[4], const uint64_t arg1[4], const uint64 /* * The function fiat_p256_sub subtracts two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -770,38 +831,33 @@ static void fiat_p256_add(uint64_t out1[4], const uint64_t arg1[4], const uint64 * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ -static void fiat_p256_sub(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) { +static FIAT_P256_FIAT_INLINE void fiat_p256_sub(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) { uint64_t x1; fiat_p256_uint1 x2; - fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0])); uint64_t x3; fiat_p256_uint1 x4; - fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1])); uint64_t x5; fiat_p256_uint1 x6; - fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2])); uint64_t x7; fiat_p256_uint1 x8; - fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3])); uint64_t x9; - fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff)); uint64_t x10; fiat_p256_uint1 x11; - fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff))); uint64_t x12; fiat_p256_uint1 x13; - fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff))); uint64_t x14; fiat_p256_uint1 x15; - fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0); uint64_t x16; fiat_p256_uint1 x17; + fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0])); + fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1])); + fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2])); + fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3])); + fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x9); + fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff))); + fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0); fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001))); out1[0] = x10; out1[1] = x12; @@ -811,43 +867,40 @@ static void fiat_p256_sub(uint64_t out1[4], const uint64_t arg1[4], const uint64 /* * The function fiat_p256_opp negates a field element in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ -static void fiat_p256_opp(uint64_t out1[4], const uint64_t arg1[4]) { +static FIAT_P256_FIAT_INLINE void fiat_p256_opp(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) { uint64_t x1; fiat_p256_uint1 x2; - fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0])); uint64_t x3; fiat_p256_uint1 x4; - fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1])); uint64_t x5; fiat_p256_uint1 x6; - fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2])); uint64_t x7; fiat_p256_uint1 x8; - fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3])); uint64_t x9; - fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff)); uint64_t x10; fiat_p256_uint1 x11; - fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff))); uint64_t x12; fiat_p256_uint1 x13; - fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff))); uint64_t x14; fiat_p256_uint1 x15; - fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0); uint64_t x16; fiat_p256_uint1 x17; + fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0])); + fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1])); + fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2])); + fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3])); + fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x9); + fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff))); + fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0); fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001))); out1[0] = x10; out1[1] = x12; @@ -857,153 +910,152 @@ static void fiat_p256_opp(uint64_t out1[4], const uint64_t arg1[4]) { /* * The function fiat_p256_from_montgomery translates a field element out of the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ -static void fiat_p256_from_montgomery(uint64_t out1[4], const uint64_t arg1[4]) { - uint64_t x1 = (arg1[0]); +static FIAT_P256_FIAT_INLINE void fiat_p256_from_montgomery(fiat_p256_non_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) { + uint64_t x1; uint64_t x2; uint64_t x3; - fiat_p256_mulx_u64(&x2, &x3, x1, UINT64_C(0xffffffff00000001)); uint64_t x4; uint64_t x5; - fiat_p256_mulx_u64(&x4, &x5, x1, UINT32_C(0xffffffff)); uint64_t x6; uint64_t x7; - fiat_p256_mulx_u64(&x6, &x7, x1, UINT64_C(0xffffffffffffffff)); uint64_t x8; fiat_p256_uint1 x9; - fiat_p256_addcarryx_u64(&x8, &x9, 0x0, x7, x4); uint64_t x10; fiat_p256_uint1 x11; - fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x6); uint64_t x12; fiat_p256_uint1 x13; - fiat_p256_addcarryx_u64(&x12, &x13, x11, 0x0, x8); uint64_t x14; fiat_p256_uint1 x15; - fiat_p256_addcarryx_u64(&x14, &x15, 0x0, x12, (arg1[1])); uint64_t x16; uint64_t x17; - fiat_p256_mulx_u64(&x16, &x17, x14, UINT64_C(0xffffffff00000001)); uint64_t x18; uint64_t x19; - fiat_p256_mulx_u64(&x18, &x19, x14, UINT32_C(0xffffffff)); uint64_t x20; uint64_t x21; - fiat_p256_mulx_u64(&x20, &x21, x14, UINT64_C(0xffffffffffffffff)); uint64_t x22; fiat_p256_uint1 x23; - fiat_p256_addcarryx_u64(&x22, &x23, 0x0, x21, x18); uint64_t x24; fiat_p256_uint1 x25; - fiat_p256_addcarryx_u64(&x24, &x25, 0x0, x14, x20); uint64_t x26; fiat_p256_uint1 x27; - fiat_p256_addcarryx_u64(&x26, &x27, x25, (x15 + (x13 + (x9 + x5))), x22); uint64_t x28; fiat_p256_uint1 x29; - fiat_p256_addcarryx_u64(&x28, &x29, x27, x2, (x23 + x19)); uint64_t x30; fiat_p256_uint1 x31; - fiat_p256_addcarryx_u64(&x30, &x31, x29, x3, x16); uint64_t x32; fiat_p256_uint1 x33; - fiat_p256_addcarryx_u64(&x32, &x33, 0x0, x26, (arg1[2])); uint64_t x34; fiat_p256_uint1 x35; - fiat_p256_addcarryx_u64(&x34, &x35, x33, x28, 0x0); uint64_t x36; fiat_p256_uint1 x37; - fiat_p256_addcarryx_u64(&x36, &x37, x35, x30, 0x0); uint64_t x38; uint64_t x39; - fiat_p256_mulx_u64(&x38, &x39, x32, UINT64_C(0xffffffff00000001)); uint64_t x40; uint64_t x41; - fiat_p256_mulx_u64(&x40, &x41, x32, UINT32_C(0xffffffff)); uint64_t x42; uint64_t x43; - fiat_p256_mulx_u64(&x42, &x43, x32, UINT64_C(0xffffffffffffffff)); uint64_t x44; fiat_p256_uint1 x45; - fiat_p256_addcarryx_u64(&x44, &x45, 0x0, x43, x40); uint64_t x46; fiat_p256_uint1 x47; - fiat_p256_addcarryx_u64(&x46, &x47, 0x0, x32, x42); uint64_t x48; fiat_p256_uint1 x49; - fiat_p256_addcarryx_u64(&x48, &x49, x47, x34, x44); uint64_t x50; fiat_p256_uint1 x51; - fiat_p256_addcarryx_u64(&x50, &x51, x49, x36, (x45 + x41)); uint64_t x52; fiat_p256_uint1 x53; - fiat_p256_addcarryx_u64(&x52, &x53, x51, (x37 + (x31 + x17)), x38); uint64_t x54; fiat_p256_uint1 x55; - fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x48, (arg1[3])); uint64_t x56; fiat_p256_uint1 x57; - fiat_p256_addcarryx_u64(&x56, &x57, x55, x50, 0x0); uint64_t x58; fiat_p256_uint1 x59; - fiat_p256_addcarryx_u64(&x58, &x59, x57, x52, 0x0); uint64_t x60; uint64_t x61; - fiat_p256_mulx_u64(&x60, &x61, x54, UINT64_C(0xffffffff00000001)); uint64_t x62; uint64_t x63; - fiat_p256_mulx_u64(&x62, &x63, x54, UINT32_C(0xffffffff)); uint64_t x64; uint64_t x65; - fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffffffffffff)); uint64_t x66; fiat_p256_uint1 x67; - fiat_p256_addcarryx_u64(&x66, &x67, 0x0, x65, x62); uint64_t x68; fiat_p256_uint1 x69; - fiat_p256_addcarryx_u64(&x68, &x69, 0x0, x54, x64); uint64_t x70; fiat_p256_uint1 x71; - fiat_p256_addcarryx_u64(&x70, &x71, x69, x56, x66); uint64_t x72; fiat_p256_uint1 x73; - fiat_p256_addcarryx_u64(&x72, &x73, x71, x58, (x67 + x63)); uint64_t x74; fiat_p256_uint1 x75; - fiat_p256_addcarryx_u64(&x74, &x75, x73, (x59 + (x53 + x39)), x60); - uint64_t x76 = (x75 + x61); + uint64_t x76; uint64_t x77; fiat_p256_uint1 x78; - fiat_p256_subborrowx_u64(&x77, &x78, 0x0, x70, UINT64_C(0xffffffffffffffff)); uint64_t x79; fiat_p256_uint1 x80; - fiat_p256_subborrowx_u64(&x79, &x80, x78, x72, UINT32_C(0xffffffff)); uint64_t x81; fiat_p256_uint1 x82; - fiat_p256_subborrowx_u64(&x81, &x82, x80, x74, 0x0); uint64_t x83; fiat_p256_uint1 x84; - fiat_p256_subborrowx_u64(&x83, &x84, x82, x76, UINT64_C(0xffffffff00000001)); uint64_t x85; fiat_p256_uint1 x86; - fiat_p256_subborrowx_u64(&x85, &x86, x84, 0x0, 0x0); uint64_t x87; - fiat_p256_cmovznz_u64(&x87, x86, x77, x70); uint64_t x88; - fiat_p256_cmovznz_u64(&x88, x86, x79, x72); uint64_t x89; - fiat_p256_cmovznz_u64(&x89, x86, x81, x74); uint64_t x90; + x1 = (arg1[0]); + fiat_p256_mulx_u64(&x2, &x3, x1, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x4, &x5, x1, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x6, &x7, x1, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x8, &x9, 0x0, x7, x4); + fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x6); + fiat_p256_addcarryx_u64(&x12, &x13, x11, 0x0, x8); + fiat_p256_addcarryx_u64(&x14, &x15, 0x0, x12, (arg1[1])); + fiat_p256_mulx_u64(&x16, &x17, x14, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x18, &x19, x14, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x20, &x21, x14, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x22, &x23, 0x0, x21, x18); + fiat_p256_addcarryx_u64(&x24, &x25, 0x0, x14, x20); + fiat_p256_addcarryx_u64(&x26, &x27, x25, (x15 + (x13 + (x9 + x5))), x22); + fiat_p256_addcarryx_u64(&x28, &x29, x27, x2, (x23 + x19)); + fiat_p256_addcarryx_u64(&x30, &x31, x29, x3, x16); + fiat_p256_addcarryx_u64(&x32, &x33, 0x0, x26, (arg1[2])); + fiat_p256_addcarryx_u64(&x34, &x35, x33, x28, 0x0); + fiat_p256_addcarryx_u64(&x36, &x37, x35, x30, 0x0); + fiat_p256_mulx_u64(&x38, &x39, x32, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x40, &x41, x32, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x42, &x43, x32, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x44, &x45, 0x0, x43, x40); + fiat_p256_addcarryx_u64(&x46, &x47, 0x0, x32, x42); + fiat_p256_addcarryx_u64(&x48, &x49, x47, x34, x44); + fiat_p256_addcarryx_u64(&x50, &x51, x49, x36, (x45 + x41)); + fiat_p256_addcarryx_u64(&x52, &x53, x51, (x37 + (x31 + x17)), x38); + fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x48, (arg1[3])); + fiat_p256_addcarryx_u64(&x56, &x57, x55, x50, 0x0); + fiat_p256_addcarryx_u64(&x58, &x59, x57, x52, 0x0); + fiat_p256_mulx_u64(&x60, &x61, x54, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x62, &x63, x54, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x66, &x67, 0x0, x65, x62); + fiat_p256_addcarryx_u64(&x68, &x69, 0x0, x54, x64); + fiat_p256_addcarryx_u64(&x70, &x71, x69, x56, x66); + fiat_p256_addcarryx_u64(&x72, &x73, x71, x58, (x67 + x63)); + fiat_p256_addcarryx_u64(&x74, &x75, x73, (x59 + (x53 + x39)), x60); + x76 = (x75 + x61); + fiat_p256_subborrowx_u64(&x77, &x78, 0x0, x70, UINT64_C(0xffffffffffffffff)); + fiat_p256_subborrowx_u64(&x79, &x80, x78, x72, UINT32_C(0xffffffff)); + fiat_p256_subborrowx_u64(&x81, &x82, x80, x74, 0x0); + fiat_p256_subborrowx_u64(&x83, &x84, x82, x76, UINT64_C(0xffffffff00000001)); + fiat_p256_subborrowx_u64(&x85, &x86, x84, 0x0, 0x0); + fiat_p256_cmovznz_u64(&x87, x86, x77, x70); + fiat_p256_cmovznz_u64(&x88, x86, x79, x72); + fiat_p256_cmovznz_u64(&x89, x86, x81, x74); fiat_p256_cmovznz_u64(&x90, x86, x83, x76); out1[0] = x87; out1[1] = x88; @@ -1012,7 +1064,284 @@ static void fiat_p256_from_montgomery(uint64_t out1[4], const uint64_t arg1[4]) } /* + * The function fiat_p256_to_montgomery translates a field element into the Montgomery domain. + * + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = eval arg1 mod m + * 0 ≤ eval out1 < m + * + */ +static FIAT_P256_FIAT_INLINE void fiat_p256_to_montgomery(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_non_montgomery_domain_field_element arg1) { + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + fiat_p256_uint1 x14; + uint64_t x15; + fiat_p256_uint1 x16; + uint64_t x17; + fiat_p256_uint1 x18; + uint64_t x19; + uint64_t x20; + uint64_t x21; + uint64_t x22; + uint64_t x23; + uint64_t x24; + uint64_t x25; + fiat_p256_uint1 x26; + uint64_t x27; + fiat_p256_uint1 x28; + uint64_t x29; + fiat_p256_uint1 x30; + uint64_t x31; + fiat_p256_uint1 x32; + uint64_t x33; + fiat_p256_uint1 x34; + uint64_t x35; + fiat_p256_uint1 x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint64_t x44; + uint64_t x45; + fiat_p256_uint1 x46; + uint64_t x47; + fiat_p256_uint1 x48; + uint64_t x49; + fiat_p256_uint1 x50; + uint64_t x51; + fiat_p256_uint1 x52; + uint64_t x53; + fiat_p256_uint1 x54; + uint64_t x55; + fiat_p256_uint1 x56; + uint64_t x57; + fiat_p256_uint1 x58; + uint64_t x59; + uint64_t x60; + uint64_t x61; + uint64_t x62; + uint64_t x63; + uint64_t x64; + uint64_t x65; + fiat_p256_uint1 x66; + uint64_t x67; + fiat_p256_uint1 x68; + uint64_t x69; + fiat_p256_uint1 x70; + uint64_t x71; + fiat_p256_uint1 x72; + uint64_t x73; + fiat_p256_uint1 x74; + uint64_t x75; + fiat_p256_uint1 x76; + uint64_t x77; + uint64_t x78; + uint64_t x79; + uint64_t x80; + uint64_t x81; + uint64_t x82; + uint64_t x83; + uint64_t x84; + uint64_t x85; + fiat_p256_uint1 x86; + uint64_t x87; + fiat_p256_uint1 x88; + uint64_t x89; + fiat_p256_uint1 x90; + uint64_t x91; + fiat_p256_uint1 x92; + uint64_t x93; + fiat_p256_uint1 x94; + uint64_t x95; + fiat_p256_uint1 x96; + uint64_t x97; + fiat_p256_uint1 x98; + uint64_t x99; + uint64_t x100; + uint64_t x101; + uint64_t x102; + uint64_t x103; + uint64_t x104; + uint64_t x105; + fiat_p256_uint1 x106; + uint64_t x107; + fiat_p256_uint1 x108; + uint64_t x109; + fiat_p256_uint1 x110; + uint64_t x111; + fiat_p256_uint1 x112; + uint64_t x113; + fiat_p256_uint1 x114; + uint64_t x115; + fiat_p256_uint1 x116; + uint64_t x117; + uint64_t x118; + uint64_t x119; + uint64_t x120; + uint64_t x121; + uint64_t x122; + uint64_t x123; + uint64_t x124; + uint64_t x125; + fiat_p256_uint1 x126; + uint64_t x127; + fiat_p256_uint1 x128; + uint64_t x129; + fiat_p256_uint1 x130; + uint64_t x131; + fiat_p256_uint1 x132; + uint64_t x133; + fiat_p256_uint1 x134; + uint64_t x135; + fiat_p256_uint1 x136; + uint64_t x137; + fiat_p256_uint1 x138; + uint64_t x139; + uint64_t x140; + uint64_t x141; + uint64_t x142; + uint64_t x143; + uint64_t x144; + uint64_t x145; + fiat_p256_uint1 x146; + uint64_t x147; + fiat_p256_uint1 x148; + uint64_t x149; + fiat_p256_uint1 x150; + uint64_t x151; + fiat_p256_uint1 x152; + uint64_t x153; + fiat_p256_uint1 x154; + uint64_t x155; + fiat_p256_uint1 x156; + uint64_t x157; + fiat_p256_uint1 x158; + uint64_t x159; + fiat_p256_uint1 x160; + uint64_t x161; + fiat_p256_uint1 x162; + uint64_t x163; + fiat_p256_uint1 x164; + uint64_t x165; + fiat_p256_uint1 x166; + uint64_t x167; + uint64_t x168; + uint64_t x169; + uint64_t x170; + x1 = (arg1[1]); + x2 = (arg1[2]); + x3 = (arg1[3]); + x4 = (arg1[0]); + fiat_p256_mulx_u64(&x5, &x6, x4, UINT64_C(0x4fffffffd)); + fiat_p256_mulx_u64(&x7, &x8, x4, UINT64_C(0xfffffffffffffffe)); + fiat_p256_mulx_u64(&x9, &x10, x4, UINT64_C(0xfffffffbffffffff)); + fiat_p256_mulx_u64(&x11, &x12, x4, 0x3); + fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9); + fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7); + fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5); + fiat_p256_mulx_u64(&x19, &x20, x11, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x21, &x22, x11, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x23, &x24, x11, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x25, &x26, 0x0, x24, x21); + fiat_p256_addcarryx_u64(&x27, &x28, 0x0, x11, x23); + fiat_p256_addcarryx_u64(&x29, &x30, x28, x13, x25); + fiat_p256_addcarryx_u64(&x31, &x32, x30, x15, (x26 + x22)); + fiat_p256_addcarryx_u64(&x33, &x34, x32, x17, x19); + fiat_p256_addcarryx_u64(&x35, &x36, x34, (x18 + x6), x20); + fiat_p256_mulx_u64(&x37, &x38, x1, UINT64_C(0x4fffffffd)); + fiat_p256_mulx_u64(&x39, &x40, x1, UINT64_C(0xfffffffffffffffe)); + fiat_p256_mulx_u64(&x41, &x42, x1, UINT64_C(0xfffffffbffffffff)); + fiat_p256_mulx_u64(&x43, &x44, x1, 0x3); + fiat_p256_addcarryx_u64(&x45, &x46, 0x0, x44, x41); + fiat_p256_addcarryx_u64(&x47, &x48, x46, x42, x39); + fiat_p256_addcarryx_u64(&x49, &x50, x48, x40, x37); + fiat_p256_addcarryx_u64(&x51, &x52, 0x0, x29, x43); + fiat_p256_addcarryx_u64(&x53, &x54, x52, x31, x45); + fiat_p256_addcarryx_u64(&x55, &x56, x54, x33, x47); + fiat_p256_addcarryx_u64(&x57, &x58, x56, x35, x49); + fiat_p256_mulx_u64(&x59, &x60, x51, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x61, &x62, x51, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x63, &x64, x51, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x65, &x66, 0x0, x64, x61); + fiat_p256_addcarryx_u64(&x67, &x68, 0x0, x51, x63); + fiat_p256_addcarryx_u64(&x69, &x70, x68, x53, x65); + fiat_p256_addcarryx_u64(&x71, &x72, x70, x55, (x66 + x62)); + fiat_p256_addcarryx_u64(&x73, &x74, x72, x57, x59); + fiat_p256_addcarryx_u64(&x75, &x76, x74, (((uint64_t)x58 + x36) + (x50 + x38)), x60); + fiat_p256_mulx_u64(&x77, &x78, x2, UINT64_C(0x4fffffffd)); + fiat_p256_mulx_u64(&x79, &x80, x2, UINT64_C(0xfffffffffffffffe)); + fiat_p256_mulx_u64(&x81, &x82, x2, UINT64_C(0xfffffffbffffffff)); + fiat_p256_mulx_u64(&x83, &x84, x2, 0x3); + fiat_p256_addcarryx_u64(&x85, &x86, 0x0, x84, x81); + fiat_p256_addcarryx_u64(&x87, &x88, x86, x82, x79); + fiat_p256_addcarryx_u64(&x89, &x90, x88, x80, x77); + fiat_p256_addcarryx_u64(&x91, &x92, 0x0, x69, x83); + fiat_p256_addcarryx_u64(&x93, &x94, x92, x71, x85); + fiat_p256_addcarryx_u64(&x95, &x96, x94, x73, x87); + fiat_p256_addcarryx_u64(&x97, &x98, x96, x75, x89); + fiat_p256_mulx_u64(&x99, &x100, x91, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x101, &x102, x91, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x103, &x104, x91, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x105, &x106, 0x0, x104, x101); + fiat_p256_addcarryx_u64(&x107, &x108, 0x0, x91, x103); + fiat_p256_addcarryx_u64(&x109, &x110, x108, x93, x105); + fiat_p256_addcarryx_u64(&x111, &x112, x110, x95, (x106 + x102)); + fiat_p256_addcarryx_u64(&x113, &x114, x112, x97, x99); + fiat_p256_addcarryx_u64(&x115, &x116, x114, (((uint64_t)x98 + x76) + (x90 + x78)), x100); + fiat_p256_mulx_u64(&x117, &x118, x3, UINT64_C(0x4fffffffd)); + fiat_p256_mulx_u64(&x119, &x120, x3, UINT64_C(0xfffffffffffffffe)); + fiat_p256_mulx_u64(&x121, &x122, x3, UINT64_C(0xfffffffbffffffff)); + fiat_p256_mulx_u64(&x123, &x124, x3, 0x3); + fiat_p256_addcarryx_u64(&x125, &x126, 0x0, x124, x121); + fiat_p256_addcarryx_u64(&x127, &x128, x126, x122, x119); + fiat_p256_addcarryx_u64(&x129, &x130, x128, x120, x117); + fiat_p256_addcarryx_u64(&x131, &x132, 0x0, x109, x123); + fiat_p256_addcarryx_u64(&x133, &x134, x132, x111, x125); + fiat_p256_addcarryx_u64(&x135, &x136, x134, x113, x127); + fiat_p256_addcarryx_u64(&x137, &x138, x136, x115, x129); + fiat_p256_mulx_u64(&x139, &x140, x131, UINT64_C(0xffffffff00000001)); + fiat_p256_mulx_u64(&x141, &x142, x131, UINT32_C(0xffffffff)); + fiat_p256_mulx_u64(&x143, &x144, x131, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x145, &x146, 0x0, x144, x141); + fiat_p256_addcarryx_u64(&x147, &x148, 0x0, x131, x143); + fiat_p256_addcarryx_u64(&x149, &x150, x148, x133, x145); + fiat_p256_addcarryx_u64(&x151, &x152, x150, x135, (x146 + x142)); + fiat_p256_addcarryx_u64(&x153, &x154, x152, x137, x139); + fiat_p256_addcarryx_u64(&x155, &x156, x154, (((uint64_t)x138 + x116) + (x130 + x118)), x140); + fiat_p256_subborrowx_u64(&x157, &x158, 0x0, x149, UINT64_C(0xffffffffffffffff)); + fiat_p256_subborrowx_u64(&x159, &x160, x158, x151, UINT32_C(0xffffffff)); + fiat_p256_subborrowx_u64(&x161, &x162, x160, x153, 0x0); + fiat_p256_subborrowx_u64(&x163, &x164, x162, x155, UINT64_C(0xffffffff00000001)); + fiat_p256_subborrowx_u64(&x165, &x166, x164, x156, 0x0); + fiat_p256_cmovznz_u64(&x167, x166, x157, x149); + fiat_p256_cmovznz_u64(&x168, x166, x159, x151); + fiat_p256_cmovznz_u64(&x169, x166, x161, x153); + fiat_p256_cmovznz_u64(&x170, x166, x163, x155); + out1[0] = x167; + out1[1] = x168; + out1[2] = x169; + out1[3] = x170; +} + +/* * The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: @@ -1023,13 +1352,15 @@ static void fiat_p256_from_montgomery(uint64_t out1[4], const uint64_t arg1[4]) * Output Bounds: * out1: [0x0 ~> 0xffffffffffffffff] */ -static void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) { - uint64_t x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | (uint64_t)0x0)))); +static FIAT_P256_FIAT_INLINE void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) { + uint64_t x1; + x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3])))); *out1 = x1; } /* * The function fiat_p256_selectznz is a multi-limb conditional select. + * * Postconditions: * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) * @@ -1040,14 +1371,14 @@ static void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) { * Output Bounds: * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ -static void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) { +static FIAT_P256_FIAT_INLINE void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) { uint64_t x1; - fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0])); uint64_t x2; - fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1])); uint64_t x3; - fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2])); uint64_t x4; + fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0])); + fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1])); + fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2])); fiat_p256_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3])); out1[0] = x1; out1[1] = x2; @@ -1056,7 +1387,8 @@ static void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const ui } /* - * The function fiat_p256_to_bytes serializes a field element in the Montgomery domain to bytes in little-endian order. + * The function fiat_p256_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: @@ -1067,106 +1399,164 @@ static void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const ui * Output Bounds: * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]] */ -static void fiat_p256_to_bytes(uint8_t out1[32], const uint64_t arg1[4]) { - uint64_t x1 = (arg1[3]); - uint64_t x2 = (arg1[2]); - uint64_t x3 = (arg1[1]); - uint64_t x4 = (arg1[0]); - uint64_t x5 = (x4 >> 8); - uint8_t x6 = (uint8_t)(x4 & UINT8_C(0xff)); - uint64_t x7 = (x5 >> 8); - uint8_t x8 = (uint8_t)(x5 & UINT8_C(0xff)); - uint64_t x9 = (x7 >> 8); - uint8_t x10 = (uint8_t)(x7 & UINT8_C(0xff)); - uint64_t x11 = (x9 >> 8); - uint8_t x12 = (uint8_t)(x9 & UINT8_C(0xff)); - uint64_t x13 = (x11 >> 8); - uint8_t x14 = (uint8_t)(x11 & UINT8_C(0xff)); - uint64_t x15 = (x13 >> 8); - uint8_t x16 = (uint8_t)(x13 & UINT8_C(0xff)); - uint8_t x17 = (uint8_t)(x15 >> 8); - uint8_t x18 = (uint8_t)(x15 & UINT8_C(0xff)); - uint8_t x19 = (uint8_t)(x17 & UINT8_C(0xff)); - uint64_t x20 = (x3 >> 8); - uint8_t x21 = (uint8_t)(x3 & UINT8_C(0xff)); - uint64_t x22 = (x20 >> 8); - uint8_t x23 = (uint8_t)(x20 & UINT8_C(0xff)); - uint64_t x24 = (x22 >> 8); - uint8_t x25 = (uint8_t)(x22 & UINT8_C(0xff)); - uint64_t x26 = (x24 >> 8); - uint8_t x27 = (uint8_t)(x24 & UINT8_C(0xff)); - uint64_t x28 = (x26 >> 8); - uint8_t x29 = (uint8_t)(x26 & UINT8_C(0xff)); - uint64_t x30 = (x28 >> 8); - uint8_t x31 = (uint8_t)(x28 & UINT8_C(0xff)); - uint8_t x32 = (uint8_t)(x30 >> 8); - uint8_t x33 = (uint8_t)(x30 & UINT8_C(0xff)); - uint8_t x34 = (uint8_t)(x32 & UINT8_C(0xff)); - uint64_t x35 = (x2 >> 8); - uint8_t x36 = (uint8_t)(x2 & UINT8_C(0xff)); - uint64_t x37 = (x35 >> 8); - uint8_t x38 = (uint8_t)(x35 & UINT8_C(0xff)); - uint64_t x39 = (x37 >> 8); - uint8_t x40 = (uint8_t)(x37 & UINT8_C(0xff)); - uint64_t x41 = (x39 >> 8); - uint8_t x42 = (uint8_t)(x39 & UINT8_C(0xff)); - uint64_t x43 = (x41 >> 8); - uint8_t x44 = (uint8_t)(x41 & UINT8_C(0xff)); - uint64_t x45 = (x43 >> 8); - uint8_t x46 = (uint8_t)(x43 & UINT8_C(0xff)); - uint8_t x47 = (uint8_t)(x45 >> 8); - uint8_t x48 = (uint8_t)(x45 & UINT8_C(0xff)); - uint8_t x49 = (uint8_t)(x47 & UINT8_C(0xff)); - uint64_t x50 = (x1 >> 8); - uint8_t x51 = (uint8_t)(x1 & UINT8_C(0xff)); - uint64_t x52 = (x50 >> 8); - uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff)); - uint64_t x54 = (x52 >> 8); - uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff)); - uint64_t x56 = (x54 >> 8); - uint8_t x57 = (uint8_t)(x54 & UINT8_C(0xff)); - uint64_t x58 = (x56 >> 8); - uint8_t x59 = (uint8_t)(x56 & UINT8_C(0xff)); - uint64_t x60 = (x58 >> 8); - uint8_t x61 = (uint8_t)(x58 & UINT8_C(0xff)); - uint8_t x62 = (uint8_t)(x60 >> 8); - uint8_t x63 = (uint8_t)(x60 & UINT8_C(0xff)); - out1[0] = x6; - out1[1] = x8; - out1[2] = x10; - out1[3] = x12; - out1[4] = x14; - out1[5] = x16; - out1[6] = x18; - out1[7] = x19; - out1[8] = x21; - out1[9] = x23; - out1[10] = x25; - out1[11] = x27; - out1[12] = x29; - out1[13] = x31; - out1[14] = x33; - out1[15] = x34; - out1[16] = x36; - out1[17] = x38; - out1[18] = x40; - out1[19] = x42; - out1[20] = x44; - out1[21] = x46; - out1[22] = x48; - out1[23] = x49; - out1[24] = x51; - out1[25] = x53; - out1[26] = x55; - out1[27] = x57; - out1[28] = x59; - out1[29] = x61; - out1[30] = x63; - out1[31] = x62; +static FIAT_P256_FIAT_INLINE void fiat_p256_to_bytes(uint8_t out1[32], const uint64_t arg1[4]) { + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint8_t x5; + uint64_t x6; + uint8_t x7; + uint64_t x8; + uint8_t x9; + uint64_t x10; + uint8_t x11; + uint64_t x12; + uint8_t x13; + uint64_t x14; + uint8_t x15; + uint64_t x16; + uint8_t x17; + uint8_t x18; + uint8_t x19; + uint64_t x20; + uint8_t x21; + uint64_t x22; + uint8_t x23; + uint64_t x24; + uint8_t x25; + uint64_t x26; + uint8_t x27; + uint64_t x28; + uint8_t x29; + uint64_t x30; + uint8_t x31; + uint8_t x32; + uint8_t x33; + uint64_t x34; + uint8_t x35; + uint64_t x36; + uint8_t x37; + uint64_t x38; + uint8_t x39; + uint64_t x40; + uint8_t x41; + uint64_t x42; + uint8_t x43; + uint64_t x44; + uint8_t x45; + uint8_t x46; + uint8_t x47; + uint64_t x48; + uint8_t x49; + uint64_t x50; + uint8_t x51; + uint64_t x52; + uint8_t x53; + uint64_t x54; + uint8_t x55; + uint64_t x56; + uint8_t x57; + uint64_t x58; + uint8_t x59; + uint8_t x60; + x1 = (arg1[3]); + x2 = (arg1[2]); + x3 = (arg1[1]); + x4 = (arg1[0]); + x5 = (uint8_t)(x4 & UINT8_C(0xff)); + x6 = (x4 >> 8); + x7 = (uint8_t)(x6 & UINT8_C(0xff)); + x8 = (x6 >> 8); + x9 = (uint8_t)(x8 & UINT8_C(0xff)); + x10 = (x8 >> 8); + x11 = (uint8_t)(x10 & UINT8_C(0xff)); + x12 = (x10 >> 8); + x13 = (uint8_t)(x12 & UINT8_C(0xff)); + x14 = (x12 >> 8); + x15 = (uint8_t)(x14 & UINT8_C(0xff)); + x16 = (x14 >> 8); + x17 = (uint8_t)(x16 & UINT8_C(0xff)); + x18 = (uint8_t)(x16 >> 8); + x19 = (uint8_t)(x3 & UINT8_C(0xff)); + x20 = (x3 >> 8); + x21 = (uint8_t)(x20 & UINT8_C(0xff)); + x22 = (x20 >> 8); + x23 = (uint8_t)(x22 & UINT8_C(0xff)); + x24 = (x22 >> 8); + x25 = (uint8_t)(x24 & UINT8_C(0xff)); + x26 = (x24 >> 8); + x27 = (uint8_t)(x26 & UINT8_C(0xff)); + x28 = (x26 >> 8); + x29 = (uint8_t)(x28 & UINT8_C(0xff)); + x30 = (x28 >> 8); + x31 = (uint8_t)(x30 & UINT8_C(0xff)); + x32 = (uint8_t)(x30 >> 8); + x33 = (uint8_t)(x2 & UINT8_C(0xff)); + x34 = (x2 >> 8); + x35 = (uint8_t)(x34 & UINT8_C(0xff)); + x36 = (x34 >> 8); + x37 = (uint8_t)(x36 & UINT8_C(0xff)); + x38 = (x36 >> 8); + x39 = (uint8_t)(x38 & UINT8_C(0xff)); + x40 = (x38 >> 8); + x41 = (uint8_t)(x40 & UINT8_C(0xff)); + x42 = (x40 >> 8); + x43 = (uint8_t)(x42 & UINT8_C(0xff)); + x44 = (x42 >> 8); + x45 = (uint8_t)(x44 & UINT8_C(0xff)); + x46 = (uint8_t)(x44 >> 8); + x47 = (uint8_t)(x1 & UINT8_C(0xff)); + x48 = (x1 >> 8); + x49 = (uint8_t)(x48 & UINT8_C(0xff)); + x50 = (x48 >> 8); + x51 = (uint8_t)(x50 & UINT8_C(0xff)); + x52 = (x50 >> 8); + x53 = (uint8_t)(x52 & UINT8_C(0xff)); + x54 = (x52 >> 8); + x55 = (uint8_t)(x54 & UINT8_C(0xff)); + x56 = (x54 >> 8); + x57 = (uint8_t)(x56 & UINT8_C(0xff)); + x58 = (x56 >> 8); + x59 = (uint8_t)(x58 & UINT8_C(0xff)); + x60 = (uint8_t)(x58 >> 8); + out1[0] = x5; + out1[1] = x7; + out1[2] = x9; + out1[3] = x11; + out1[4] = x13; + out1[5] = x15; + out1[6] = x17; + out1[7] = x18; + out1[8] = x19; + out1[9] = x21; + out1[10] = x23; + out1[11] = x25; + out1[12] = x27; + out1[13] = x29; + out1[14] = x31; + out1[15] = x32; + out1[16] = x33; + out1[17] = x35; + out1[18] = x37; + out1[19] = x39; + out1[20] = x41; + out1[21] = x43; + out1[22] = x45; + out1[23] = x46; + out1[24] = x47; + out1[25] = x49; + out1[26] = x51; + out1[27] = x53; + out1[28] = x55; + out1[29] = x57; + out1[30] = x59; + out1[31] = x60; } /* - * The function fiat_p256_from_bytes deserializes a field element in the Montgomery domain from bytes in little-endian order. + * The function fiat_p256_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order. + * * Preconditions: * 0 ≤ bytes_eval arg1 < m * Postconditions: @@ -1178,49 +1568,444 @@ static void fiat_p256_to_bytes(uint8_t out1[32], const uint64_t arg1[4]) { * Output Bounds: * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ -static void fiat_p256_from_bytes(uint64_t out1[4], const uint8_t arg1[32]) { - uint64_t x1 = ((uint64_t)(arg1[31]) << 56); - uint64_t x2 = ((uint64_t)(arg1[30]) << 48); - uint64_t x3 = ((uint64_t)(arg1[29]) << 40); - uint64_t x4 = ((uint64_t)(arg1[28]) << 32); - uint64_t x5 = ((uint64_t)(arg1[27]) << 24); - uint64_t x6 = ((uint64_t)(arg1[26]) << 16); - uint64_t x7 = ((uint64_t)(arg1[25]) << 8); - uint8_t x8 = (arg1[24]); - uint64_t x9 = ((uint64_t)(arg1[23]) << 56); - uint64_t x10 = ((uint64_t)(arg1[22]) << 48); - uint64_t x11 = ((uint64_t)(arg1[21]) << 40); - uint64_t x12 = ((uint64_t)(arg1[20]) << 32); - uint64_t x13 = ((uint64_t)(arg1[19]) << 24); - uint64_t x14 = ((uint64_t)(arg1[18]) << 16); - uint64_t x15 = ((uint64_t)(arg1[17]) << 8); - uint8_t x16 = (arg1[16]); - uint64_t x17 = ((uint64_t)(arg1[15]) << 56); - uint64_t x18 = ((uint64_t)(arg1[14]) << 48); - uint64_t x19 = ((uint64_t)(arg1[13]) << 40); - uint64_t x20 = ((uint64_t)(arg1[12]) << 32); - uint64_t x21 = ((uint64_t)(arg1[11]) << 24); - uint64_t x22 = ((uint64_t)(arg1[10]) << 16); - uint64_t x23 = ((uint64_t)(arg1[9]) << 8); - uint8_t x24 = (arg1[8]); - uint64_t x25 = ((uint64_t)(arg1[7]) << 56); - uint64_t x26 = ((uint64_t)(arg1[6]) << 48); - uint64_t x27 = ((uint64_t)(arg1[5]) << 40); - uint64_t x28 = ((uint64_t)(arg1[4]) << 32); - uint64_t x29 = ((uint64_t)(arg1[3]) << 24); - uint64_t x30 = ((uint64_t)(arg1[2]) << 16); - uint64_t x31 = ((uint64_t)(arg1[1]) << 8); - uint8_t x32 = (arg1[0]); - uint64_t x33 = (x32 + (x31 + (x30 + (x29 + (x28 + (x27 + (x26 + x25))))))); - uint64_t x34 = (x33 & UINT64_C(0xffffffffffffffff)); - uint64_t x35 = (x8 + (x7 + (x6 + (x5 + (x4 + (x3 + (x2 + x1))))))); - uint64_t x36 = (x16 + (x15 + (x14 + (x13 + (x12 + (x11 + (x10 + x9))))))); - uint64_t x37 = (x24 + (x23 + (x22 + (x21 + (x20 + (x19 + (x18 + x17))))))); - uint64_t x38 = (x37 & UINT64_C(0xffffffffffffffff)); - uint64_t x39 = (x36 & UINT64_C(0xffffffffffffffff)); - out1[0] = x34; - out1[1] = x38; - out1[2] = x39; - out1[3] = x35; +static FIAT_P256_FIAT_INLINE void fiat_p256_from_bytes(uint64_t out1[4], const uint8_t arg1[32]) { + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint8_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint8_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; + uint64_t x21; + uint64_t x22; + uint64_t x23; + uint8_t x24; + uint64_t x25; + uint64_t x26; + uint64_t x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint8_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + uint64_t x42; + uint64_t x43; + uint64_t x44; + uint64_t x45; + uint64_t x46; + uint64_t x47; + uint64_t x48; + uint64_t x49; + uint64_t x50; + uint64_t x51; + uint64_t x52; + uint64_t x53; + uint64_t x54; + uint64_t x55; + uint64_t x56; + uint64_t x57; + uint64_t x58; + uint64_t x59; + uint64_t x60; + x1 = ((uint64_t)(arg1[31]) << 56); + x2 = ((uint64_t)(arg1[30]) << 48); + x3 = ((uint64_t)(arg1[29]) << 40); + x4 = ((uint64_t)(arg1[28]) << 32); + x5 = ((uint64_t)(arg1[27]) << 24); + x6 = ((uint64_t)(arg1[26]) << 16); + x7 = ((uint64_t)(arg1[25]) << 8); + x8 = (arg1[24]); + x9 = ((uint64_t)(arg1[23]) << 56); + x10 = ((uint64_t)(arg1[22]) << 48); + x11 = ((uint64_t)(arg1[21]) << 40); + x12 = ((uint64_t)(arg1[20]) << 32); + x13 = ((uint64_t)(arg1[19]) << 24); + x14 = ((uint64_t)(arg1[18]) << 16); + x15 = ((uint64_t)(arg1[17]) << 8); + x16 = (arg1[16]); + x17 = ((uint64_t)(arg1[15]) << 56); + x18 = ((uint64_t)(arg1[14]) << 48); + x19 = ((uint64_t)(arg1[13]) << 40); + x20 = ((uint64_t)(arg1[12]) << 32); + x21 = ((uint64_t)(arg1[11]) << 24); + x22 = ((uint64_t)(arg1[10]) << 16); + x23 = ((uint64_t)(arg1[9]) << 8); + x24 = (arg1[8]); + x25 = ((uint64_t)(arg1[7]) << 56); + x26 = ((uint64_t)(arg1[6]) << 48); + x27 = ((uint64_t)(arg1[5]) << 40); + x28 = ((uint64_t)(arg1[4]) << 32); + x29 = ((uint64_t)(arg1[3]) << 24); + x30 = ((uint64_t)(arg1[2]) << 16); + x31 = ((uint64_t)(arg1[1]) << 8); + x32 = (arg1[0]); + x33 = (x31 + (uint64_t)x32); + x34 = (x30 + x33); + x35 = (x29 + x34); + x36 = (x28 + x35); + x37 = (x27 + x36); + x38 = (x26 + x37); + x39 = (x25 + x38); + x40 = (x23 + (uint64_t)x24); + x41 = (x22 + x40); + x42 = (x21 + x41); + x43 = (x20 + x42); + x44 = (x19 + x43); + x45 = (x18 + x44); + x46 = (x17 + x45); + x47 = (x15 + (uint64_t)x16); + x48 = (x14 + x47); + x49 = (x13 + x48); + x50 = (x12 + x49); + x51 = (x11 + x50); + x52 = (x10 + x51); + x53 = (x9 + x52); + x54 = (x7 + (uint64_t)x8); + x55 = (x6 + x54); + x56 = (x5 + x55); + x57 = (x4 + x56); + x58 = (x3 + x57); + x59 = (x2 + x58); + x60 = (x1 + x59); + out1[0] = x39; + out1[1] = x46; + out1[2] = x53; + out1[3] = x60; +} + +/* + * The function fiat_p256_set_one returns the field element one in the Montgomery domain. + * + * Postconditions: + * eval (from_montgomery out1) mod m = 1 mod m + * 0 ≤ eval out1 < m + * + */ +static FIAT_P256_FIAT_INLINE void fiat_p256_set_one(fiat_p256_montgomery_domain_field_element out1) { + out1[0] = 0x1; + out1[1] = UINT64_C(0xffffffff00000000); + out1[2] = UINT64_C(0xffffffffffffffff); + out1[3] = UINT32_C(0xfffffffe); +} + +/* + * The function fiat_p256_msat returns the saturated representation of the prime modulus. + * + * Postconditions: + * twos_complement_eval out1 = m + * 0 ≤ eval out1 < m + * + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +static FIAT_P256_FIAT_INLINE void fiat_p256_msat(uint64_t out1[5]) { + out1[0] = UINT64_C(0xffffffffffffffff); + out1[1] = UINT32_C(0xffffffff); + out1[2] = 0x0; + out1[3] = UINT64_C(0xffffffff00000001); + out1[4] = 0x0; +} + +/* + * The function fiat_p256_divstep computes a divstep. + * + * Preconditions: + * 0 ≤ eval arg4 < m + * 0 ≤ eval arg5 < m + * Postconditions: + * out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1) + * twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2) + * twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋) + * eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m) + * eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m) + * 0 ≤ eval out5 < m + * 0 ≤ eval out5 < m + * 0 ≤ eval out2 < m + * 0 ≤ eval out3 < m + * + * Input Bounds: + * arg1: [0x0 ~> 0xffffffffffffffff] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + * out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +static FIAT_P256_FIAT_INLINE void fiat_p256_divstep(uint64_t* out1, uint64_t out2[5], uint64_t out3[5], uint64_t out4[4], uint64_t out5[4], uint64_t arg1, const uint64_t arg2[5], const uint64_t arg3[5], const uint64_t arg4[4], const uint64_t arg5[4]) { + uint64_t x1; + fiat_p256_uint1 x2; + fiat_p256_uint1 x3; + uint64_t x4; + fiat_p256_uint1 x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + fiat_p256_uint1 x13; + uint64_t x14; + fiat_p256_uint1 x15; + uint64_t x16; + fiat_p256_uint1 x17; + uint64_t x18; + fiat_p256_uint1 x19; + uint64_t x20; + fiat_p256_uint1 x21; + uint64_t x22; + uint64_t x23; + uint64_t x24; + uint64_t x25; + uint64_t x26; + uint64_t x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + fiat_p256_uint1 x32; + uint64_t x33; + fiat_p256_uint1 x34; + uint64_t x35; + fiat_p256_uint1 x36; + uint64_t x37; + fiat_p256_uint1 x38; + uint64_t x39; + fiat_p256_uint1 x40; + uint64_t x41; + fiat_p256_uint1 x42; + uint64_t x43; + fiat_p256_uint1 x44; + uint64_t x45; + fiat_p256_uint1 x46; + uint64_t x47; + fiat_p256_uint1 x48; + uint64_t x49; + uint64_t x50; + uint64_t x51; + uint64_t x52; + uint64_t x53; + fiat_p256_uint1 x54; + uint64_t x55; + fiat_p256_uint1 x56; + uint64_t x57; + fiat_p256_uint1 x58; + uint64_t x59; + fiat_p256_uint1 x60; + uint64_t x61; + uint64_t x62; + fiat_p256_uint1 x63; + uint64_t x64; + fiat_p256_uint1 x65; + uint64_t x66; + fiat_p256_uint1 x67; + uint64_t x68; + fiat_p256_uint1 x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + fiat_p256_uint1 x74; + uint64_t x75; + uint64_t x76; + uint64_t x77; + uint64_t x78; + uint64_t x79; + uint64_t x80; + fiat_p256_uint1 x81; + uint64_t x82; + fiat_p256_uint1 x83; + uint64_t x84; + fiat_p256_uint1 x85; + uint64_t x86; + fiat_p256_uint1 x87; + uint64_t x88; + fiat_p256_uint1 x89; + uint64_t x90; + uint64_t x91; + uint64_t x92; + uint64_t x93; + uint64_t x94; + fiat_p256_uint1 x95; + uint64_t x96; + fiat_p256_uint1 x97; + uint64_t x98; + fiat_p256_uint1 x99; + uint64_t x100; + fiat_p256_uint1 x101; + uint64_t x102; + fiat_p256_uint1 x103; + uint64_t x104; + fiat_p256_uint1 x105; + uint64_t x106; + fiat_p256_uint1 x107; + uint64_t x108; + fiat_p256_uint1 x109; + uint64_t x110; + fiat_p256_uint1 x111; + uint64_t x112; + fiat_p256_uint1 x113; + uint64_t x114; + uint64_t x115; + uint64_t x116; + uint64_t x117; + uint64_t x118; + uint64_t x119; + uint64_t x120; + uint64_t x121; + uint64_t x122; + uint64_t x123; + uint64_t x124; + uint64_t x125; + uint64_t x126; + fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (~arg1), 0x1); + x3 = (fiat_p256_uint1)((fiat_p256_uint1)(x1 >> 63) & (fiat_p256_uint1)((arg3[0]) & 0x1)); + fiat_p256_addcarryx_u64(&x4, &x5, 0x0, (~arg1), 0x1); + fiat_p256_cmovznz_u64(&x6, x3, arg1, x4); + fiat_p256_cmovznz_u64(&x7, x3, (arg2[0]), (arg3[0])); + fiat_p256_cmovznz_u64(&x8, x3, (arg2[1]), (arg3[1])); + fiat_p256_cmovznz_u64(&x9, x3, (arg2[2]), (arg3[2])); + fiat_p256_cmovznz_u64(&x10, x3, (arg2[3]), (arg3[3])); + fiat_p256_cmovznz_u64(&x11, x3, (arg2[4]), (arg3[4])); + fiat_p256_addcarryx_u64(&x12, &x13, 0x0, 0x1, (~(arg2[0]))); + fiat_p256_addcarryx_u64(&x14, &x15, x13, 0x0, (~(arg2[1]))); + fiat_p256_addcarryx_u64(&x16, &x17, x15, 0x0, (~(arg2[2]))); + fiat_p256_addcarryx_u64(&x18, &x19, x17, 0x0, (~(arg2[3]))); + fiat_p256_addcarryx_u64(&x20, &x21, x19, 0x0, (~(arg2[4]))); + fiat_p256_cmovznz_u64(&x22, x3, (arg3[0]), x12); + fiat_p256_cmovznz_u64(&x23, x3, (arg3[1]), x14); + fiat_p256_cmovznz_u64(&x24, x3, (arg3[2]), x16); + fiat_p256_cmovznz_u64(&x25, x3, (arg3[3]), x18); + fiat_p256_cmovznz_u64(&x26, x3, (arg3[4]), x20); + fiat_p256_cmovznz_u64(&x27, x3, (arg4[0]), (arg5[0])); + fiat_p256_cmovznz_u64(&x28, x3, (arg4[1]), (arg5[1])); + fiat_p256_cmovznz_u64(&x29, x3, (arg4[2]), (arg5[2])); + fiat_p256_cmovznz_u64(&x30, x3, (arg4[3]), (arg5[3])); + fiat_p256_addcarryx_u64(&x31, &x32, 0x0, x27, x27); + fiat_p256_addcarryx_u64(&x33, &x34, x32, x28, x28); + fiat_p256_addcarryx_u64(&x35, &x36, x34, x29, x29); + fiat_p256_addcarryx_u64(&x37, &x38, x36, x30, x30); + fiat_p256_subborrowx_u64(&x39, &x40, 0x0, x31, UINT64_C(0xffffffffffffffff)); + fiat_p256_subborrowx_u64(&x41, &x42, x40, x33, UINT32_C(0xffffffff)); + fiat_p256_subborrowx_u64(&x43, &x44, x42, x35, 0x0); + fiat_p256_subborrowx_u64(&x45, &x46, x44, x37, UINT64_C(0xffffffff00000001)); + fiat_p256_subborrowx_u64(&x47, &x48, x46, x38, 0x0); + x49 = (arg4[3]); + x50 = (arg4[2]); + x51 = (arg4[1]); + x52 = (arg4[0]); + fiat_p256_subborrowx_u64(&x53, &x54, 0x0, 0x0, x52); + fiat_p256_subborrowx_u64(&x55, &x56, x54, 0x0, x51); + fiat_p256_subborrowx_u64(&x57, &x58, x56, 0x0, x50); + fiat_p256_subborrowx_u64(&x59, &x60, x58, 0x0, x49); + fiat_p256_cmovznz_u64(&x61, x60, 0x0, UINT64_C(0xffffffffffffffff)); + fiat_p256_addcarryx_u64(&x62, &x63, 0x0, x53, x61); + fiat_p256_addcarryx_u64(&x64, &x65, x63, x55, (x61 & UINT32_C(0xffffffff))); + fiat_p256_addcarryx_u64(&x66, &x67, x65, x57, 0x0); + fiat_p256_addcarryx_u64(&x68, &x69, x67, x59, (x61 & UINT64_C(0xffffffff00000001))); + fiat_p256_cmovznz_u64(&x70, x3, (arg5[0]), x62); + fiat_p256_cmovznz_u64(&x71, x3, (arg5[1]), x64); + fiat_p256_cmovznz_u64(&x72, x3, (arg5[2]), x66); + fiat_p256_cmovznz_u64(&x73, x3, (arg5[3]), x68); + x74 = (fiat_p256_uint1)(x22 & 0x1); + fiat_p256_cmovznz_u64(&x75, x74, 0x0, x7); + fiat_p256_cmovznz_u64(&x76, x74, 0x0, x8); + fiat_p256_cmovznz_u64(&x77, x74, 0x0, x9); + fiat_p256_cmovznz_u64(&x78, x74, 0x0, x10); + fiat_p256_cmovznz_u64(&x79, x74, 0x0, x11); + fiat_p256_addcarryx_u64(&x80, &x81, 0x0, x22, x75); + fiat_p256_addcarryx_u64(&x82, &x83, x81, x23, x76); + fiat_p256_addcarryx_u64(&x84, &x85, x83, x24, x77); + fiat_p256_addcarryx_u64(&x86, &x87, x85, x25, x78); + fiat_p256_addcarryx_u64(&x88, &x89, x87, x26, x79); + fiat_p256_cmovznz_u64(&x90, x74, 0x0, x27); + fiat_p256_cmovznz_u64(&x91, x74, 0x0, x28); + fiat_p256_cmovznz_u64(&x92, x74, 0x0, x29); + fiat_p256_cmovznz_u64(&x93, x74, 0x0, x30); + fiat_p256_addcarryx_u64(&x94, &x95, 0x0, x70, x90); + fiat_p256_addcarryx_u64(&x96, &x97, x95, x71, x91); + fiat_p256_addcarryx_u64(&x98, &x99, x97, x72, x92); + fiat_p256_addcarryx_u64(&x100, &x101, x99, x73, x93); + fiat_p256_subborrowx_u64(&x102, &x103, 0x0, x94, UINT64_C(0xffffffffffffffff)); + fiat_p256_subborrowx_u64(&x104, &x105, x103, x96, UINT32_C(0xffffffff)); + fiat_p256_subborrowx_u64(&x106, &x107, x105, x98, 0x0); + fiat_p256_subborrowx_u64(&x108, &x109, x107, x100, UINT64_C(0xffffffff00000001)); + fiat_p256_subborrowx_u64(&x110, &x111, x109, x101, 0x0); + fiat_p256_addcarryx_u64(&x112, &x113, 0x0, x6, 0x1); + x114 = ((x80 >> 1) | ((x82 << 63) & UINT64_C(0xffffffffffffffff))); + x115 = ((x82 >> 1) | ((x84 << 63) & UINT64_C(0xffffffffffffffff))); + x116 = ((x84 >> 1) | ((x86 << 63) & UINT64_C(0xffffffffffffffff))); + x117 = ((x86 >> 1) | ((x88 << 63) & UINT64_C(0xffffffffffffffff))); + x118 = ((x88 & UINT64_C(0x8000000000000000)) | (x88 >> 1)); + fiat_p256_cmovznz_u64(&x119, x48, x39, x31); + fiat_p256_cmovznz_u64(&x120, x48, x41, x33); + fiat_p256_cmovznz_u64(&x121, x48, x43, x35); + fiat_p256_cmovznz_u64(&x122, x48, x45, x37); + fiat_p256_cmovznz_u64(&x123, x111, x102, x94); + fiat_p256_cmovznz_u64(&x124, x111, x104, x96); + fiat_p256_cmovznz_u64(&x125, x111, x106, x98); + fiat_p256_cmovznz_u64(&x126, x111, x108, x100); + *out1 = x112; + out2[0] = x7; + out2[1] = x8; + out2[2] = x9; + out2[3] = x10; + out2[4] = x11; + out3[0] = x114; + out3[1] = x115; + out3[2] = x116; + out3[3] = x117; + out3[4] = x118; + out4[0] = x119; + out4[1] = x120; + out4[2] = x121; + out4[3] = x122; + out5[0] = x123; + out5[1] = x124; + out5[2] = x125; + out5[3] = x126; } +/* + * The function fiat_p256_divstep_precomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form). + * + * Postconditions: + * eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if ⌊log2 m⌋ + 1 < 46 then ⌊(49 * (⌊log2 m⌋ + 1) + 80) / 17⌋ else ⌊(49 * (⌊log2 m⌋ + 1) + 57) / 17⌋) + * 0 ≤ eval out1 < m + * + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +static FIAT_P256_FIAT_INLINE void fiat_p256_divstep_precomp(uint64_t out1[4]) { + out1[0] = UINT64_C(0x67ffffffb8000000); + out1[1] = UINT64_C(0xc000000038000000); + out1[2] = UINT64_C(0xd80000007fffffff); + out1[3] = UINT64_C(0x2fffffffffffffff); +} |