10 files changed, 275 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h
index 1788502..48d0544 100644
--- a/math/include/mathlib.h
+++ b/math/include/mathlib.h
@@ -30,6 +30,8 @@ float __s_expf (float);
 float __s_expf_1u (float);
 float __s_logf (float);
 float __s_powf (float, float);
+double __s_sin (double);
+double __s_cos (double);
 double __s_exp (double);
 
 #if __aarch64__
@@ -50,6 +52,8 @@ __f32x4_t __v_expf (__f32x4_t);
 __f32x4_t __v_expf_1u (__f32x4_t);
 __f32x4_t __v_logf (__f32x4_t);
 __f32x4_t __v_powf (__f32x4_t, __f32x4_t);
+__f64x2_t __v_sin (__f64x2_t);
+__f64x2_t __v_cos (__f64x2_t);
 __f64x2_t __v_exp (__f64x2_t);
 
 #if __GNUC__ >= 9 || __clang_major__ >= 8
@@ -62,6 +66,8 @@ __vpcs __f32x4_t __vn_expf (__f32x4_t);
 __vpcs __f32x4_t __vn_expf_1u (__f32x4_t);
 __vpcs __f32x4_t __vn_logf (__f32x4_t);
 __vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t);
+__vpcs __f64x2_t __vn_sin (__f64x2_t);
+__vpcs __f64x2_t __vn_cos (__f64x2_t);
 __vpcs __f64x2_t __vn_exp (__f64x2_t);
 
 /* Vector functions following the vector PCS using ABI names.  */
@@ -70,6 +76,8 @@ __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
 #endif
 #endif
diff --git a/math/s_cos.c b/math/s_cos.c
new file mode 100644
index 0000000..53a95b0
--- /dev/null
+++ b/math/s_cos.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_cos.c"
diff --git a/math/s_sin.c b/math/s_sin.c
new file mode 100644
index 0000000..06982c2
--- /dev/null
+++ b/math/s_sin.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_sin.c"
diff --git a/math/test/mathbench.c b/math/test/mathbench.c
index 84e69f5..7831971 100644
--- a/math/test/mathbench.c
+++ b/math/test/mathbench.c
@@ -207,6 +207,8 @@ static const struct fun
 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
 D (dummy, 1.0, 2.0)
+D (__s_sin, -3.1, 3.1)
+D (__s_cos, -3.1, 3.1)
 D (exp, -9.9, 9.9)
 D (exp, 0.5, 1.0)
 D (__s_exp, -9.9, 9.9)
@@ -253,6 +255,8 @@ F (cosf, 1e6, 1e32)
 F (__s_cosf, -3.1, 3.1)
 #if __aarch64__
 VD (__v_dummy, 1.0, 2.0)
+VD (__v_sin, -3.1, 3.1)
+VD (__v_cos, -3.1, 3.1)
 VD (__v_exp, -9.9, 9.9)
 VF (__v_dummyf, 1.0, 2.0)
 VF (__v_expf, -9.9, 9.9)
@@ -265,6 +269,10 @@ VF (__v_cosf, -3.1, 3.1)
 VND (__vn_dummy, 1.0, 2.0)
 VND (__vn_exp, -9.9, 9.9)
 VND (_ZGVnN2v_exp, -9.9, 9.9)
+VND (__vn_sin, -3.1, 3.1)
+VND (_ZGVnN2v_sin, -3.1, 3.1)
+VND (__vn_cos, -3.1, 3.1)
+VND (_ZGVnN2v_cos, -3.1, 3.1)
 VNF (__vn_dummyf, 1.0, 2.0)
 VNF (__vn_expf, -9.9, 9.9)
 VNF (_ZGVnN4v_expf, -9.9, 9.9)
diff --git a/math/test/ulp.c b/math/test/ulp.c
index 514a245..6a3ed12 100644
--- a/math/test/ulp.c
+++ b/math/test/ulp.c
@@ -229,6 +229,8 @@ static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; }
 static float v_expf(float x) { return __v_expf(argf(x))[0]; }
 static float v_logf(float x) { return __v_logf(argf(x))[0]; }
 static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; }
+static double v_sin(double x) { return __v_sin(argd(x))[0]; }
+static double v_cos(double x) { return __v_cos(argd(x))[0]; }
 static double v_exp(double x) { return __v_exp(argd(x))[0]; }
 #ifdef __vpcs
 static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
@@ -237,12 +239,16 @@ static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; }
 static float vn_expf(float x) { return __vn_expf(argf(x))[0]; }
 static float vn_logf(float x) { return __vn_logf(argf(x))[0]; }
 static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; }
+static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
+static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
 static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
 static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
 static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
 static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
 static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
 static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
+static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
+static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
 static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
 #endif
 #endif
@@ -308,6 +314,8 @@ static const struct fun fun[] = {
  F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0)
  F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0)
  F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0)
+ F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+ F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
  F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
 #if __aarch64__
  F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
@@ -316,6 +324,8 @@ static const struct fun fun[] = {
  F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1)
  F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1)
  F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
 #ifdef __vpcs
  F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
@@ -324,12 +334,16 @@ static const struct fun fun[] = {
  F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1)
  F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1)
  F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
  F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
  F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
  F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
  F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
  F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
 #endif
 #endif
diff --git a/math/tools/v_sin.sollya b/math/tools/v_sin.sollya
new file mode 100644
index 0000000..65cc995
--- /dev/null
+++ b/math/tools/v_sin.sollya
@@ -0,0 +1,36 @@
+// polynomial for approximating sin(x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 15;  // polynomial degree
+a = -pi/2; // interval
+b = pi/2;
+
+// find even polynomial with minimal abs error compared to sin(x)/x
+
+// account for /x
+deg = deg-1;
+
+// f = sin(x)/x;
+f = 1;
+c = 1;
+for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+  return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg/2 do {
+  p = roundcoefficients(approx(poly,2*i), [|D ...|]);
+  poly = poly + x^(2*i)*coeff(p,0);
+};
+
+display = hexadecimal;
+print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/math/v_cos.c b/math/v_cos.c
new file mode 100644
index 0000000..20ba6bd
--- /dev/null
+++ b/math/v_cos.c
@@ -0,0 +1,87 @@
+/*
+ * Double-precision vector cos function.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#if V_SUPPORTED
+
+static const double Poly[] = {
+/* worst-case error is 3.5 ulp.
+   abs error: 0x1.be222a58p-53 in [-pi/2, pi/2].  */
+-0x1.9f4a9c8b21dc9p-41,
+ 0x1.60e88a10163f2p-33,
+-0x1.ae6361b7254e7p-26,
+ 0x1.71de382e8d62bp-19,
+-0x1.a01a019aeb4ffp-13,
+ 0x1.111111110b25ep-7,
+-0x1.55555555554c3p-3,
+};
+
+#define C7 v_f64 (Poly[0])
+#define C6 v_f64 (Poly[1])
+#define C5 v_f64 (Poly[2])
+#define C4 v_f64 (Poly[3])
+#define C3 v_f64 (Poly[4])
+#define C2 v_f64 (Poly[5])
+#define C1 v_f64 (Poly[6])
+
+#define InvPi v_f64 (0x1.45f306dc9c883p-2)
+#define HalfPi v_f64 (0x1.921fb54442d18p+0)
+#define Pi1 v_f64 (0x1.921fb54442d18p+1)
+#define Pi2 v_f64 (0x1.1a62633145c06p-53)
+#define Pi3 v_f64 (0x1.c1cd129024e09p-106)
+#define Shift v_f64 (0x1.8p52)
+#define RangeVal v_f64 (0x1p23)
+#define AbsMask v_u64 (0x7fffffffffffffff)
+
+VPCS_ATTR
+__attribute__ ((noinline)) static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
+{
+  return v_call_f64 (cos, x, y, cmp);
+}
+
+VPCS_ATTR
+v_f64_t
+V_NAME(cos) (v_f64_t x)
+{
+  v_f64_t n, r, r2, y;
+  v_u64_t odd, cmp;
+
+  r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
+  cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
+
+  /* n = rint((|x|+pi/2)/pi) - 0.5.  */
+  n = v_fma_f64 (InvPi, r + HalfPi, Shift);
+  odd = v_as_u64_f64 (n) << 63;
+  n -= Shift;
+  n -= v_f64 (0.5);
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+  r = v_fma_f64 (-Pi1, n, r);
+  r = v_fma_f64 (-Pi2, n, r);
+  r = v_fma_f64 (-Pi3, n, r);
+
+  /* sin(r) poly approx.  */
+  r2 = r * r;
+  y = v_fma_f64 (C7, r2, C6);
+  y = v_fma_f64 (y, r2, C5);
+  y = v_fma_f64 (y, r2, C4);
+  y = v_fma_f64 (y, r2, C3);
+  y = v_fma_f64 (y, r2, C2);
+  y = v_fma_f64 (y, r2, C1);
+  y = v_fma_f64 (y * r2, r, r);
+
+  /* sign.  */
+  y = v_as_f64_u64 (v_as_u64_f64 (y) ^ odd);
+
+  if (unlikely (v_any_u64 (cmp)))
+    return specialcase (x, y, cmp);
+  return y;
+}
+VPCS_ALIAS
+#endif
diff --git a/math/v_sin.c b/math/v_sin.c
new file mode 100644
index 0000000..2b9ed05
--- /dev/null
+++ b/math/v_sin.c
@@ -0,0 +1,86 @@
+/*
+ * Double-precision vector sin function.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#if V_SUPPORTED
+
+static const double Poly[] = {
+/* worst-case error is 3.5 ulp.
+   abs error: 0x1.be222a58p-53 in [-pi/2, pi/2].  */
+-0x1.9f4a9c8b21dc9p-41,
+ 0x1.60e88a10163f2p-33,
+-0x1.ae6361b7254e7p-26,
+ 0x1.71de382e8d62bp-19,
+-0x1.a01a019aeb4ffp-13,
+ 0x1.111111110b25ep-7,
+-0x1.55555555554c3p-3,
+};
+
+#define C7 v_f64 (Poly[0])
+#define C6 v_f64 (Poly[1])
+#define C5 v_f64 (Poly[2])
+#define C4 v_f64 (Poly[3])
+#define C3 v_f64 (Poly[4])
+#define C2 v_f64 (Poly[5])
+#define C1 v_f64 (Poly[6])
+
+#define InvPi v_f64 (0x1.45f306dc9c883p-2)
+#define Pi1 v_f64 (0x1.921fb54442d18p+1)
+#define Pi2 v_f64 (0x1.1a62633145c06p-53)
+#define Pi3 v_f64 (0x1.c1cd129024e09p-106)
+#define Shift v_f64 (0x1.8p52)
+#define RangeVal v_f64 (0x1p23)
+#define AbsMask v_u64 (0x7fffffffffffffff)
+
+VPCS_ATTR
+__attribute__ ((noinline)) static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
+{
+  return v_call_f64 (sin, x, y, cmp);
+}
+
+VPCS_ATTR
+v_f64_t
+V_NAME(sin) (v_f64_t x)
+{
+  v_f64_t n, r, r2, y;
+  v_u64_t sign, odd, cmp;
+
+  r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
+  sign = v_as_u64_f64 (x) & ~AbsMask;
+  cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
+
+  /* n = rint(|x|/pi).  */
+  n = v_fma_f64 (InvPi, r, Shift);
+  odd = v_as_u64_f64 (n) << 63;
+  n -= Shift;
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+  r = v_fma_f64 (-Pi1, n, r);
+  r = v_fma_f64 (-Pi2, n, r);
+  r = v_fma_f64 (-Pi3, n, r);
+
+  /* sin(r) poly approx.  */
+  r2 = r * r;
+  y = v_fma_f64 (C7, r2, C6);
+  y = v_fma_f64 (y, r2, C5);
+  y = v_fma_f64 (y, r2, C4);
+  y = v_fma_f64 (y, r2, C3);
+  y = v_fma_f64 (y, r2, C2);
+  y = v_fma_f64 (y, r2, C1);
+  y = v_fma_f64 (y * r2, r, r);
+
+  /* sign.  */
+  y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign ^ odd);
+
+  if (unlikely (v_any_u64 (cmp)))
+    return specialcase (x, y, cmp);
+  return y;
+}
+VPCS_ALIAS
+#endif
diff --git a/math/vn_cos.c b/math/vn_cos.c
new file mode 100644
index 0000000..b57a549
--- /dev/null
+++ b/math/vn_cos.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_cos.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#include "mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS strong_alias (__vn_cos, _ZGVnN2v_cos)
+#include "v_cos.c"
+#endif
diff --git a/math/vn_sin.c b/math/vn_sin.c
new file mode 100644
index 0000000..905c796
--- /dev/null
+++ b/math/vn_sin.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_sin.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#include "mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS strong_alias (__vn_sin, _ZGVnN2v_sin)
+#include "v_sin.c"
+#endif