summaryrefslogtreecommitdiff
path: root/src/third_party/fiat/asm/fiat_p256_adx_sqr.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/fiat/asm/fiat_p256_adx_sqr.S')
-rw-r--r--src/third_party/fiat/asm/fiat_p256_adx_sqr.S167
1 files changed, 167 insertions, 0 deletions
diff --git a/src/third_party/fiat/asm/fiat_p256_adx_sqr.S b/src/third_party/fiat/asm/fiat_p256_adx_sqr.S
new file mode 100644
index 00000000..cca269f5
--- /dev/null
+++ b/src/third_party/fiat/asm/fiat_p256_adx_sqr.S
@@ -0,0 +1,167 @@
+#include <openssl/asm_base.h>
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
+ (defined(__APPLE__) || defined(__ELF__))
+
+.intel_syntax noprefix
+.text
+#if defined(__APPLE__)
+.private_extern _fiat_p256_adx_sqr
+.global _fiat_p256_adx_sqr
+_fiat_p256_adx_sqr:
+#else
+.type fiat_p256_adx_sqr, @function
+.hidden fiat_p256_adx_sqr
+.global fiat_p256_adx_sqr
+fiat_p256_adx_sqr:
+#endif
+
+.cfi_startproc
+_CET_ENDBR
+push rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset rbp, -16
+mov rbp, rsp
+mov rdx, [ rsi + 0x0 ]
+mulx r10, rax, [ rsi + 0x18 ]
+mulx rcx, r11, rdx
+mulx r9, r8, [ rsi + 0x8 ]
+mov [ rsp - 0x80 ], rbx
+.cfi_offset rbx, -16-0x80
+xor rbx, rbx
+adox r8, r8
+mov [ rsp - 0x78 ], r12
+.cfi_offset r12, -16-0x78
+mulx r12, rbx, [ rsi + 0x10 ]
+mov rdx, [ rsi + 0x8 ]
+mov [ rsp - 0x70 ], r13
+.cfi_offset r13, -16-0x70
+mov [ rsp - 0x68 ], r14
+.cfi_offset r14, -16-0x68
+mulx r14, r13, rdx
+mov [ rsp - 0x60 ], r15
+.cfi_offset r15, -16-0x60
+mov [ rsp - 0x58 ], rdi
+mulx rdi, r15, [ rsi + 0x10 ]
+adcx r12, r15
+mov [ rsp - 0x50 ], r11
+mulx r11, r15, [ rsi + 0x18 ]
+adcx r10, rdi
+mov rdi, 0x0
+adcx r11, rdi
+clc
+adcx rbx, r9
+adox rbx, rbx
+adcx rax, r12
+adox rax, rax
+adcx r15, r10
+adox r15, r15
+mov rdx, [ rsi + 0x10 ]
+mulx r12, r9, [ rsi + 0x18 ]
+adcx r9, r11
+adcx r12, rdi
+mulx r11, r10, rdx
+clc
+adcx rcx, r8
+adcx r13, rbx
+adcx r14, rax
+adox r9, r9
+adcx r10, r15
+mov rdx, [ rsi + 0x18 ]
+mulx rbx, r8, rdx
+adox r12, r12
+adcx r11, r9
+mov rsi, [ rsp - 0x50 ]
+adcx r8, r12
+mov rax, 0x100000000
+mov rdx, rax
+mulx r15, rax, rsi
+adcx rbx, rdi
+adox rbx, rdi
+xor r9, r9
+adox rax, rcx
+adox r15, r13
+mulx rcx, rdi, rax
+adcx rdi, r15
+adox rcx, r14
+mov rdx, 0xffffffff00000001
+mulx r14, r13, rsi
+adox r14, r10
+adcx r13, rcx
+mulx r12, r10, rax
+adox r12, r11
+mov r11, r9
+adox r11, r8
+adcx r10, r14
+mov r8, r9
+adcx r8, r12
+mov rax, r9
+adcx rax, r11
+mov r15, r9
+adox r15, rbx
+mov rdx, 0x100000000
+mulx rcx, rbx, rdi
+mov r14, r9
+adcx r14, r15
+mov r12, r9
+adox r12, r12
+adcx r12, r9
+adox rbx, r13
+mulx r11, r13, rbx
+mov r15, 0xffffffff00000001
+mov rdx, r15
+mulx rsi, r15, rbx
+adox rcx, r10
+adox r11, r8
+mulx r8, r10, rdi
+adcx r13, rcx
+adox r8, rax
+adcx r10, r11
+adox rsi, r14
+mov rdi, r12
+mov rax, r9
+adox rdi, rax
+adcx r15, r8
+mov r14, rax
+adcx r14, rsi
+adcx rdi, r9
+dec r9
+mov rbx, r13
+sub rbx, r9
+mov rcx, 0xffffffff
+mov r11, r10
+sbb r11, rcx
+mov r8, r15
+sbb r8, rax
+mov rsi, r14
+sbb rsi, rdx
+sbb rdi, rax
+cmovc rbx, r13
+cmovc r8, r15
+cmovc r11, r10
+cmovc rsi, r14
+mov rdi, [ rsp - 0x58 ]
+mov [ rdi + 0x18 ], rsi
+mov [ rdi + 0x0 ], rbx
+mov [ rdi + 0x8 ], r11
+mov [ rdi + 0x10 ], r8
+mov rbx, [ rsp - 0x80 ]
+.cfi_restore rbx
+mov r12, [ rsp - 0x78 ]
+.cfi_restore r12
+mov r13, [ rsp - 0x70 ]
+.cfi_restore r13
+mov r14, [ rsp - 0x68 ]
+.cfi_restore r14
+mov r15, [ rsp - 0x60 ]
+.cfi_restore r15
+pop rbp
+.cfi_restore rbp
+.cfi_adjust_cfa_offset -8
+ret
+.cfi_endproc
+#if defined(__ELF__)
+.size fiat_p256_adx_sqr, .-fiat_p256_adx_sqr
+#endif
+
+#endif