summaryrefslogtreecommitdiff
path: root/src/crypto/fipsmodule/bn/asm/x86_64-gcc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/fipsmodule/bn/asm/x86_64-gcc.c')
-rw-r--r--src/crypto/fipsmodule/bn/asm/x86_64-gcc.c141
1 files changed, 68 insertions, 73 deletions
diff --git a/src/crypto/fipsmodule/bn/asm/x86_64-gcc.c b/src/crypto/fipsmodule/bn/asm/x86_64-gcc.c
index 72e7689c..bfd770f5 100644
--- a/src/crypto/fipsmodule/bn/asm/x86_64-gcc.c
+++ b/src/crypto/fipsmodule/bn/asm/x86_64-gcc.c
@@ -52,7 +52,7 @@
#include <openssl/bn.h>
-/* TODO(davidben): Get this file working on Windows x64. */
+// TODO(davidben): Get this file working on Windows x64.
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__GNUC__)
#include "../internal.h"
@@ -61,41 +61,37 @@
#undef mul
#undef mul_add
-#define asm __asm__
-
-/*
- * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
- * "g"(0) let the compiler to decide where does it
- * want to keep the value of zero;
- */
-#define mul_add(r, a, word, carry) \
- do { \
- register BN_ULONG high, low; \
- asm("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "m"(a) : "cc"); \
- asm("addq %2,%0; adcq %3,%1" \
- : "+r"(carry), "+d"(high) \
- : "a"(low), "g"(0) \
- : "cc"); \
- asm("addq %2,%0; adcq %3,%1" \
- : "+m"(r), "+d"(high) \
- : "r"(carry), "g"(0) \
- : "cc"); \
- (carry) = high; \
+// "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
+// "g"(0) let the compiler to decide where does it
+// want to keep the value of zero;
+#define mul_add(r, a, word, carry) \
+ do { \
+ register BN_ULONG high, low; \
+ __asm__("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "m"(a) : "cc"); \
+ __asm__("addq %2,%0; adcq %3,%1" \
+ : "+r"(carry), "+d"(high) \
+ : "a"(low), "g"(0) \
+ : "cc"); \
+ __asm__("addq %2,%0; adcq %3,%1" \
+ : "+m"(r), "+d"(high) \
+ : "r"(carry), "g"(0) \
+ : "cc"); \
+ (carry) = high; \
} while (0)
-#define mul(r, a, word, carry) \
- do { \
- register BN_ULONG high, low; \
- asm("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "g"(a) : "cc"); \
- asm("addq %2,%0; adcq %3,%1" \
- : "+r"(carry), "+d"(high) \
- : "a"(low), "g"(0) \
- : "cc"); \
- (r) = (carry); \
- (carry) = high; \
+#define mul(r, a, word, carry) \
+ do { \
+ register BN_ULONG high, low; \
+ __asm__("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "g"(a) : "cc"); \
+ __asm__("addq %2,%0; adcq %3,%1" \
+ : "+r"(carry), "+d"(high) \
+ : "a"(low), "g"(0) \
+ : "cc"); \
+ (r) = (carry); \
+ (carry) = high; \
} while (0)
#undef sqr
-#define sqr(r0, r1, a) asm("mulq %2" : "=a"(r0), "=d"(r1) : "a"(a) : "cc");
+#define sqr(r0, r1, a) __asm__("mulq %2" : "=a"(r0), "=d"(r1) : "a"(a) : "cc");
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
BN_ULONG w) {
@@ -196,8 +192,8 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
return 0;
}
- asm volatile (
- " subq %0,%0 \n" /* clear carry */
+ __asm__ volatile (
+ " subq %0,%0 \n" // clear carry
" jmp 1f \n"
".p2align 4 \n"
"1:"
@@ -223,8 +219,8 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
return 0;
}
- asm volatile (
- " subq %0,%0 \n" /* clear borrow */
+ __asm__ volatile (
+ " subq %0,%0 \n" // clear borrow
" jmp 1f \n"
".p2align 4 \n"
"1:"
@@ -241,46 +237,45 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
return ret & 1;
}
-/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
-/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
-/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
-/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
- */
-
-/* Keep in mind that carrying into high part of multiplication result can not
- * overflow, because it cannot be all-ones. */
-#define mul_add_c(a, b, c0, c1, c2) \
- do { \
- BN_ULONG t1, t2; \
- asm("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
- asm("addq %3,%0; adcq %4,%1; adcq %5,%2" \
- : "+r"(c0), "+r"(c1), "+r"(c2) \
- : "r"(t1), "r"(t2), "g"(0) \
- : "cc"); \
+// mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0)
+// mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
+// sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0)
+// sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
+
+// Keep in mind that carrying into high part of multiplication result can not
+// overflow, because it cannot be all-ones.
+#define mul_add_c(a, b, c0, c1, c2) \
+ do { \
+ BN_ULONG t1, t2; \
+ __asm__("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
+ __asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
+ : "+r"(c0), "+r"(c1), "+r"(c2) \
+ : "r"(t1), "r"(t2), "g"(0) \
+ : "cc"); \
} while (0)
-#define sqr_add_c(a, i, c0, c1, c2) \
- do { \
- BN_ULONG t1, t2; \
- asm("mulq %2" : "=a"(t1), "=d"(t2) : "a"((a)[i]) : "cc"); \
- asm("addq %3,%0; adcq %4,%1; adcq %5,%2" \
- : "+r"(c0), "+r"(c1), "+r"(c2) \
- : "r"(t1), "r"(t2), "g"(0) \
- : "cc"); \
+#define sqr_add_c(a, i, c0, c1, c2) \
+ do { \
+ BN_ULONG t1, t2; \
+ __asm__("mulq %2" : "=a"(t1), "=d"(t2) : "a"((a)[i]) : "cc"); \
+ __asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
+ : "+r"(c0), "+r"(c1), "+r"(c2) \
+ : "r"(t1), "r"(t2), "g"(0) \
+ : "cc"); \
} while (0)
-#define mul_add_c2(a, b, c0, c1, c2) \
- do { \
- BN_ULONG t1, t2; \
- asm("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
- asm("addq %3,%0; adcq %4,%1; adcq %5,%2" \
- : "+r"(c0), "+r"(c1), "+r"(c2) \
- : "r"(t1), "r"(t2), "g"(0) \
- : "cc"); \
- asm("addq %3,%0; adcq %4,%1; adcq %5,%2" \
- : "+r"(c0), "+r"(c1), "+r"(c2) \
- : "r"(t1), "r"(t2), "g"(0) \
- : "cc"); \
+#define mul_add_c2(a, b, c0, c1, c2) \
+ do { \
+ BN_ULONG t1, t2; \
+ __asm__("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
+ __asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
+ : "+r"(c0), "+r"(c1), "+r"(c2) \
+ : "r"(t1), "r"(t2), "g"(0) \
+ : "cc"); \
+ __asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
+ : "+r"(c0), "+r"(c1), "+r"(c2) \
+ : "r"(t1), "r"(t2), "g"(0) \
+ : "cc"); \
} while (0)
#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
@@ -539,4 +534,4 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
#undef mul_add_c2
#undef sqr_add_c2
-#endif /* !NO_ASM && X86_64 && __GNUC__ */
+#endif // !NO_ASM && X86_64 && __GNUC__