From 8a644bf15812edaba38b41ca142e8e7e328e7918 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Wed, 30 Nov 2022 09:42:41 +0000
Subject: pl/math: Add scalar and vector/Neon tanhf

Both routines use simplified inline versions of expm1f, and are
accurate to 2.6 ULP.
---
 pl/math/tanhf_2u6.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 pl/math/tanhf_2u6.c

(limited to 'pl/math/tanhf_2u6.c')

diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c
new file mode 100644
index 0000000..9db2533
--- /dev/null
+++ b/pl/math/tanhf_2u6.c
@@ -0,0 +1,80 @@
+/*
+ * Single-precision tanh(x) function.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+
+#define BoringBound                                                            \
+  0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for        \
+		negative).  */
+#define AbsMask 0x7fffffff
+#define One 0x3f800000
+
+#define Shift (0x1.8p23f)
+#define InvLn2 (0x1.715476p+0f)
+#define Ln2hi (0x1.62e4p-1f)
+#define Ln2lo (0x1.7f7d1cp-20f)
+
+#define C(i) __expm1f_poly[i]
+
+static inline float
+expm1f_inline (float x)
+{
+  /* Helper routine for calculating exp(x) - 1.
+     Copied from expm1f_1u6.c, with several simplifications:
+     - No special-case handling for tiny or special values, instead return early
+       from the main routine.
+     - No special handling for large values:
+       - No early return for infinity.
+       - Simpler combination of p and t in final stage of algorithm.
+       - |i| < 27, so can calculate t by simpler shift-and-add, instead of
+	 ldexpf (same as vector algorithm).  */
+
+  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
+  float j = fmaf (InvLn2, x, Shift) - Shift;
+  int32_t i = j;
+  float f = fmaf (j, -Ln2hi, x);
+  f = fmaf (j, -Ln2lo, f);
+
+  /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
+     Uses Estrin scheme, where the main expm1f routine uses Horner.  */
+  float f2 = f * f;
+  float p_01 = fmaf (f, C (1), C (0));
+  float p_23 = fmaf (f, C (3), C (2));
+  float p = fmaf (f2, p_23, p_01);
+  p = fmaf (f2 * f2, C (4), p);
+  p = fmaf (f2, p, f);
+
+  /* t = 2^i.  */
+  float t = asfloat ((i << 23) + One);
+  /* expm1(x) ~= p * t + (t - 1).  */
+  return fmaf (p, t, t - 1);
+}
+
+/* Approximation for single-precision tanh(x), using a simplified version of
+   expm1f. The maximum error is 2.58 ULP:
+   tanhf(0x1.fa5eep-5) got 0x1.f9ba02p-5
+		      want 0x1.f9ba08p-5.  */
+float
+tanhf (float x)
+{
+  uint32_t ix = asuint (x);
+  uint32_t iax = ix & AbsMask;
+  uint32_t sign = ix & ~AbsMask;
+
+  if (unlikely (iax > BoringBound))
+    {
+      if (iax > 0x7f800000)
+	return __math_invalidf (x);
+      return asfloat (One | sign);
+    }
+
+  if (unlikely (iax < 0x34000000))
+    return x;
+
+  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
+  float q = expm1f_inline (2 * x);
+  return q / (q + 2);
+}
-- 
cgit v1.2.3


From 0d3c3cd35440d224ddbcd1496b48835443f4c7c1 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Mon, 5 Dec 2022 11:56:33 +0000
Subject: pl/math: Avoid UB in scalar tanhf

The ldexp shortcut was left-shifting a signed value. We now bias the
exponent first, will allows the shift to be done on an unsigned value.
---
 pl/math/tanhf_2u6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'pl/math/tanhf_2u6.c')

diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c
index 9db2533..145f437 100644
--- a/pl/math/tanhf_2u6.c
+++ b/pl/math/tanhf_2u6.c
@@ -48,7 +48,7 @@ expm1f_inline (float x)
   p = fmaf (f2, p, f);
 
   /* t = 2^i.  */
-  float t = asfloat ((i << 23) + One);
+  float t = asfloat ((uint32_t) (i + 127) << 23);
   /* expm1(x) ~= p * t + (t - 1).  */
   return fmaf (p, t, t - 1);
 }
-- 
cgit v1.2.3


From 1bca1a541cce13c352296acd5dfa16160fc27bc9 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu, 15 Dec 2022 13:27:31 +0000
Subject: pl/math: Auto-generate mathbench and ulp headers

Instead of maintaining three separate lists of routines, which
are cumbersome and prone to merge conflicts, we provide a new
macro, PL_SIG, which by some preprocessor machinery outputs the
lists in the required format (macro formats have been changed
very slightly to make the generation simpler). Only routines with
simple signatures are handled - binary functions still need
mathbench wrappers defined manually. As well, routines with
non-standard references (i.e. powi/powk) still need entries and
wrappers manually defined.
---
 pl/math/tanhf_2u6.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'pl/math/tanhf_2u6.c')

diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c
index 145f437..90f561f 100644
--- a/pl/math/tanhf_2u6.c
+++ b/pl/math/tanhf_2u6.c
@@ -5,6 +5,7 @@
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
+#include "pl_sig.h"
 
 #define BoringBound                                                            \
   0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for        \
@@ -78,3 +79,5 @@ tanhf (float x)
   float q = expm1f_inline (2 * x);
   return q / (q + 2);
 }
+
+PL_SIG (S, F, 1, tanh, -10.0, 10.0)
-- 
cgit v1.2.3


From ecb1c6f6ea7872645cb4c26514d5f64815b61a1b Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu, 15 Dec 2022 13:27:39 +0000
Subject: pl/math: Move ULP limits to routine source files

Introduces a new set of macros and Make rules for mechanically
generating a list of ULP limits for each routine, to be consumed
by runulp.sh. This removes the need to maintain long lists of
thresholds in runulp.sh.
---
 pl/math/tanhf_2u6.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'pl/math/tanhf_2u6.c')

diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c
index 90f561f..e6cbbd0 100644
--- a/pl/math/tanhf_2u6.c
+++ b/pl/math/tanhf_2u6.c
@@ -6,6 +6,7 @@
  */
 #include "math_config.h"
 #include "pl_sig.h"
+#include "pl_test.h"
 
 #define BoringBound                                                            \
   0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for        \
@@ -81,3 +82,4 @@ tanhf (float x)
 }
 
 PL_SIG (S, F, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (tanhf, 2.09)
-- 
cgit v1.2.3


From 202e46317ee8983516b6413066a57bd624ffa044 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu, 15 Dec 2022 13:28:06 +0000
Subject: pl/math: Move test intervals to routine source files

To conclude the work on simplifying the runulp.sh script, a new macro
has been introduced to specify the intervals in which a routine should
be tested in the routine source. This is eventually consumed by
runulp.sh.
---
 pl/math/tanhf_2u6.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'pl/math/tanhf_2u6.c')

diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c
index e6cbbd0..745e5e3 100644
--- a/pl/math/tanhf_2u6.c
+++ b/pl/math/tanhf_2u6.c
@@ -83,3 +83,9 @@ tanhf (float x)
 
 PL_SIG (S, F, 1, tanh, -10.0, 10.0)
 PL_TEST_ULP (tanhf, 2.09)
+PL_TEST_INTERVAL (tanhf, 0, 0x1p-23, 1000)
+PL_TEST_INTERVAL (tanhf, -0, -0x1p-23, 1000)
+PL_TEST_INTERVAL (tanhf, 0x1p-23, 0x1.205966p+3, 100000)
+PL_TEST_INTERVAL (tanhf, -0x1p-23, -0x1.205966p+3, 100000)
+PL_TEST_INTERVAL (tanhf, 0x1.205966p+3, inf, 100)
+PL_TEST_INTERVAL (tanhf, -0x1.205966p+3, -inf, 100)
-- 
cgit v1.2.3


From f0f80b8a19b2593491847ed87456694d789f6f80 Mon Sep 17 00:00:00 2001
From: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Fri, 6 Jan 2023 09:10:57 +0000
Subject: pl/math: Update copyright years

All files in pl/math updated to 2023.
---
 pl/math/tanhf_2u6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'pl/math/tanhf_2u6.c')

diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c
index 745e5e3..76e54a4 100644
--- a/pl/math/tanhf_2u6.c
+++ b/pl/math/tanhf_2u6.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision tanh(x) function.
  *
- * Copyright (c) 2022, Arm Limited.
+ * Copyright (c) 2022-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 #include "math_config.h"
-- 
cgit v1.2.3