From 83a43cd7afcbdca7e192704121acfad6fa24cb67 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Fri, 15 Jul 2022 08:56:20 +0100
Subject: pl/math: Add vector/Neon asinhf

The new routine uses vector log1pf, and is accurate to 2.7 ulp.
---
 pl/math/v_asinhf_2u7.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 pl/math/v_asinhf_2u7.c

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
new file mode 100644
index 0000000..39f7989
--- /dev/null
+++ b/pl/math/v_asinhf_2u7.c
@@ -0,0 +1,51 @@
+/*
+ * Single-precision vector asinh(x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "include/mathlib.h"
+
+#if V_SUPPORTED
+
+#define SignMask v_u32 (0x80000000)
+#define One v_f32 (1.0f)
+#define Ln2 v_f32 (0x1.62e43p-1f)
+#define SpecialBound v_u32 (0x5f800000) /* asuint(0x1p64).  */
+
+static inline v_f32_t
+handle_special (v_f32_t ax)
+{
+  return V_NAME (log1pf) (ax) + Ln2;
+}
+
+/* Single-precision implementation of vector asinh(x), using vector log1p.
+   Worst-case error is 2.66 ULP, at roughly +/-0.25:
+   __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3.  */
+VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x)
+{
+  v_f32_t ax = v_abs_f32 (x);
+  v_u32_t special = v_cond_u32 (v_as_u32_f32 (ax) >= SpecialBound);
+  v_u32_t sign = v_as_u32_f32 (x) & SignMask;
+
+  /* asinh(x) = log(x + sqrt(x * x + 1)).
+     For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))).  */
+  v_f32_t d = One + v_sqrt_f32 (ax * ax + One);
+  v_f32_t y = V_NAME (log1pf) (ax + ax * ax / d);
+
+  if (unlikely (v_any_u32 (special)))
+    {
+      /* If |x| is too large, we cannot square it at low cost without overflow.
+	 At very large x, asinh(x) ~= log(2x) and log(x) ~= log1p(x), so we
+	 calculate asinh(x) as log1p(x) + log(2).  */
+      v_f32_t y_large = V_NAME (log1pf) (ax) + Ln2;
+      return v_as_f32_u32 (sign
+			   | v_as_u32_f32 (v_sel_f32 (special, y_large, y)));
+    }
+
+  return v_as_f32_u32 (sign | v_as_u32_f32 (y));
+}
+VPCS_ALIAS
+
+#endif
-- 
cgit v1.2.3


From 3d1a87e2fe152dc52d4a624425f5b2349a4088b0 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Mon, 15 Aug 2022 11:19:25 +0100
Subject: pl/math: Audit Neon special-case handlers

Prevent inlining in most cases - change to use AOR style (NOINLINE).
---
 pl/math/v_asinhf_2u7.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
index 39f7989..675b8a8 100644
--- a/pl/math/v_asinhf_2u7.c
+++ b/pl/math/v_asinhf_2u7.c
@@ -14,12 +14,6 @@
 #define Ln2 v_f32 (0x1.62e43p-1f)
 #define SpecialBound v_u32 (0x5f800000) /* asuint(0x1p64).  */
 
-static inline v_f32_t
-handle_special (v_f32_t ax)
-{
-  return V_NAME (log1pf) (ax) + Ln2;
-}
-
 /* Single-precision implementation of vector asinh(x), using vector log1p.
    Worst-case error is 2.66 ULP, at roughly +/-0.25:
    __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3.  */
-- 
cgit v1.2.3


From 2a963bbff4f16998def16ab5c7b1c7ab92f825a8 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Tue, 6 Dec 2022 10:40:54 +0000
Subject: pl/math: Set fenv flags in Neon asinhf

Routine no longer relies on vector log1pf, as this has to become more
complex to deal with fenv itself. Instead we re-use a log1pf helper
from Neon atanhf which does no special-case handling, instead leaving
it all up to the main routine. We now just fall back to the scalar
routine for special-case handling. This uncovered a mistake in
asinhf's handling of NaNs, which has been fixed.
---
 pl/math/v_asinhf_2u7.c | 43 +++++++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 16 deletions(-)

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
index 675b8a8..7bce7ff 100644
--- a/pl/math/v_asinhf_2u7.c
+++ b/pl/math/v_asinhf_2u7.c
@@ -11,34 +11,45 @@
 
 #define SignMask v_u32 (0x80000000)
 #define One v_f32 (1.0f)
-#define Ln2 v_f32 (0x1.62e43p-1f)
-#define SpecialBound v_u32 (0x5f800000) /* asuint(0x1p64).  */
+#define BigBound v_u32 (0x5f800000)  /* asuint(0x1p64).  */
+#define TinyBound v_u32 (0x30800000) /* asuint(0x1p-30).  */
+
+#include "v_log1pf_inline.h"
+
+static NOINLINE v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t special)
+{
+  return v_call_f32 (asinhf, x, y, special);
+}
 
 /* Single-precision implementation of vector asinh(x), using vector log1p.
    Worst-case error is 2.66 ULP, at roughly +/-0.25:
    __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3.  */
 VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x)
 {
-  v_f32_t ax = v_abs_f32 (x);
-  v_u32_t special = v_cond_u32 (v_as_u32_f32 (ax) >= SpecialBound);
-  v_u32_t sign = v_as_u32_f32 (x) & SignMask;
+  v_u32_t ix = v_as_u32_f32 (x);
+  v_u32_t iax = ix & ~SignMask;
+  v_u32_t sign = ix & SignMask;
+  v_f32_t ax = v_as_f32_u32 (iax);
+  v_u32_t special = v_cond_u32 (iax >= BigBound);
+
+#if WANT_ERRNO
+  /* Sidestep tiny and large values to avoid inadvertently triggering
+     under/overflow.  */
+  special |= v_cond_u32 (iax < TinyBound);
+  if (unlikely (v_any_u32 (special)))
+    ax = v_sel_f32 (special, One, ax);
+#endif
 
   /* asinh(x) = log(x + sqrt(x * x + 1)).
      For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))).  */
   v_f32_t d = One + v_sqrt_f32 (ax * ax + One);
-  v_f32_t y = V_NAME (log1pf) (ax + ax * ax / d);
+  v_f32_t y = log1pf_inline (ax + ax * ax / d);
+  y = v_as_f32_u32 (sign | v_as_u32_f32 (y));
 
   if (unlikely (v_any_u32 (special)))
-    {
-      /* If |x| is too large, we cannot square it at low cost without overflow.
-	 At very large x, asinh(x) ~= log(2x) and log(x) ~= log1p(x), so we
-	 calculate asinh(x) as log1p(x) + log(2).  */
-      v_f32_t y_large = V_NAME (log1pf) (ax) + Ln2;
-      return v_as_f32_u32 (sign
-			   | v_as_u32_f32 (v_sel_f32 (special, y_large, y)));
-    }
-
-  return v_as_f32_u32 (sign | v_as_u32_f32 (y));
+    return specialcase (x, y, special);
+  return y;
 }
 VPCS_ALIAS
 
-- 
cgit v1.2.3


From 1bca1a541cce13c352296acd5dfa16160fc27bc9 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu, 15 Dec 2022 13:27:31 +0000
Subject: pl/math: Auto-generate mathbench and ulp headers

Instead of maintaining three separate lists of routines, which
are cumbersome and prone to merge conflicts, we provide a new
macro, PL_SIG, which by some preprocessor machinery outputs the
lists in the required format (macro formats have been changed
very slightly to make the generation simpler). Only routines with
simple signatures are handled - binary functions still need
mathbench wrappers defined manually. As well, routines with
non-standard references (i.e. powi/powk) still need entries and
wrappers manually defined.
---
 pl/math/v_asinhf_2u7.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
index 7bce7ff..18a2395 100644
--- a/pl/math/v_asinhf_2u7.c
+++ b/pl/math/v_asinhf_2u7.c
@@ -6,6 +6,7 @@
 
 #include "v_math.h"
 #include "include/mathlib.h"
+#include "pl_sig.h"
 
 #if V_SUPPORTED
 
@@ -53,4 +54,5 @@ VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x)
 }
 VPCS_ALIAS
 
+PL_SIG (V, F, 1, asinh, -10.0, 10.0)
 #endif
-- 
cgit v1.2.3


From ecb1c6f6ea7872645cb4c26514d5f64815b61a1b Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu, 15 Dec 2022 13:27:39 +0000
Subject: pl/math: Move ULP limits to routine source files

Introduces a new set of macros and Make rules for mechanically
generating a list of ULP limits for each routine, to be consumed
by runulp.sh. This removes the need to maintain long lists of
thresholds in runulp.sh.
---
 pl/math/v_asinhf_2u7.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
index 18a2395..812e28f 100644
--- a/pl/math/v_asinhf_2u7.c
+++ b/pl/math/v_asinhf_2u7.c
@@ -7,6 +7,7 @@
 #include "v_math.h"
 #include "include/mathlib.h"
 #include "pl_sig.h"
+#include "pl_test.h"
 
 #if V_SUPPORTED
 
@@ -55,4 +56,5 @@ VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x)
 VPCS_ALIAS
 
 PL_SIG (V, F, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (asinhf), 2.17)
 #endif
-- 
cgit v1.2.3


From d748e1520dd2ff5ad3574bd0827cdd882bf6bed8 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu, 15 Dec 2022 13:27:57 +0000
Subject: pl/math: Move fenv expectations out of runulp.sh

Introduces a new macro, similar to how ULP thresholds are now
handled, that emits a list of routines which are expected to
correctly trigger fenv exceptions, to be consumed by runulp.sh.
All scalar routines are expected to do so. A small number of Neon
routines are also expected to, dependent on WANT_ERRNO.
---
 pl/math/v_asinhf_2u7.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
index 812e28f..32fe773 100644
--- a/pl/math/v_asinhf_2u7.c
+++ b/pl/math/v_asinhf_2u7.c
@@ -57,4 +57,5 @@ VPCS_ALIAS
 
 PL_SIG (V, F, 1, asinh, -10.0, 10.0)
 PL_TEST_ULP (V_NAME (asinhf), 2.17)
+PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_ERRNO)
 #endif
-- 
cgit v1.2.3


From 202e46317ee8983516b6413066a57bd624ffa044 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu, 15 Dec 2022 13:28:06 +0000
Subject: pl/math: Move test intervals to routine source files

To conclude the work on simplifying the runulp.sh script, a new macro
has been introduced to specify the intervals in which a routine should
be tested in the routine source. This is eventually consumed by
runulp.sh.
---
 pl/math/v_asinhf_2u7.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
index 32fe773..79bf80f 100644
--- a/pl/math/v_asinhf_2u7.c
+++ b/pl/math/v_asinhf_2u7.c
@@ -58,4 +58,12 @@ VPCS_ALIAS
 PL_SIG (V, F, 1, asinh, -10.0, 10.0)
 PL_TEST_ULP (V_NAME (asinhf), 2.17)
 PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_ERRNO)
+PL_TEST_INTERVAL (V_NAME (asinhf), 0, 0x1p-12, 40000)
+PL_TEST_INTERVAL (V_NAME (asinhf), 0x1p-12, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME (asinhf), 1.0, 0x1p11, 40000)
+PL_TEST_INTERVAL (V_NAME (asinhf), 0x1p11, inf, 40000)
+PL_TEST_INTERVAL (V_NAME (asinhf), 0, -0x1p-12, 20000)
+PL_TEST_INTERVAL (V_NAME (asinhf), -0x1p-12, -1.0, 20000)
+PL_TEST_INTERVAL (V_NAME (asinhf), -1.0, -0x1p11, 20000)
+PL_TEST_INTERVAL (V_NAME (asinhf), -0x1p11, -inf, 20000)
 #endif
-- 
cgit v1.2.3


From d05594e6718e6d86959c823bea4f019dea878bcb Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Mon, 19 Dec 2022 12:34:51 +0000
Subject: pl/math: Replace WANT_ERRNO with WANT_SIMD_EXCEPT for Neon fenv

We were previously misusing the WANT_ERRNO build flag. This is now
replaced everywhere appropriate with WANT_SIMD_EXCEPT. A small number
of vector routines get fp exceptions right with no modification - the
tests have been updated to track this.
---
 pl/math/v_asinhf_2u7.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
index 79bf80f..4710a22 100644
--- a/pl/math/v_asinhf_2u7.c
+++ b/pl/math/v_asinhf_2u7.c
@@ -35,7 +35,7 @@ VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x)
   v_f32_t ax = v_as_f32_u32 (iax);
   v_u32_t special = v_cond_u32 (iax >= BigBound);
 
-#if WANT_ERRNO
+#if WANT_SIMD_EXCEPT
   /* Sidestep tiny and large values to avoid inadvertently triggering
      under/overflow.  */
   special |= v_cond_u32 (iax < TinyBound);
@@ -57,7 +57,7 @@ VPCS_ALIAS
 
 PL_SIG (V, F, 1, asinh, -10.0, 10.0)
 PL_TEST_ULP (V_NAME (asinhf), 2.17)
-PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_ERRNO)
+PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_SIMD_EXCEPT)
 PL_TEST_INTERVAL (V_NAME (asinhf), 0, 0x1p-12, 40000)
 PL_TEST_INTERVAL (V_NAME (asinhf), 0x1p-12, 1.0, 40000)
 PL_TEST_INTERVAL (V_NAME (asinhf), 1.0, 0x1p11, 40000)
-- 
cgit v1.2.3


From f0f80b8a19b2593491847ed87456694d789f6f80 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Fri, 6 Jan 2023 09:10:57 +0000
Subject: pl/math: Update copyright years

All files in pl/math updated to 2023.
---
 pl/math/v_asinhf_2u7.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'pl/math/v_asinhf_2u7.c')

diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
index 4710a22..9d8c8a9 100644
--- a/pl/math/v_asinhf_2u7.c
+++ b/pl/math/v_asinhf_2u7.c
@@ -1,6 +1,7 @@
 /*
  * Single-precision vector asinh(x) function.
- * Copyright (c) 2022, Arm Limited.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-- 
cgit v1.2.3