pl/math: Use single-precision fma in atan2f

The polynomial was mistakenly using double-precision fma, where single is sufficiently accurate. New underflow special cases have been handled accordingly.
author: Joe Ramsay <Joe.Ramsay@arm.com> 2022-06-22 10:00:21 +0100
committer: Joe Ramsay <joe.ramsay@arm.com> 2022-06-22 10:00:21 +0100
commit: 90510ecbb921d79799e7d62eda43696f25b8e51e (patch)
tree: 74f98c989b7448f55c5e9af72034caa6cab0cadf
parent: 7f8252e7a0302c08f88a30fedebe7333662f1c01 (diff)
download: arm-optimized-routines-90510ecbb921d79799e7d62eda43696f25b8e51e.tar.gz
2 files changed, 30 insertions, 13 deletions
diff --git a/pl/math/atan2f_3u.c b/pl/math/atan2f_3u.c
index 7d83b67..d2f1749 100644
--- a/pl/math/atan2f_3u.c
+++ b/pl/math/atan2f_3u.c
@@ -15,6 +15,12 @@
 #define PiOver4 (0x1.921fb6p-1f)
 #define SignMask (0x80000000)
 
+/* We calculate atan2f by P(n/d), where n and d are similar to the input
+   arguments, and P is a polynomial. The polynomial may underflow.
+   POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
+   for which P underflows, and is used to special-case such inputs.  */
+#define POLY_UFLOW_BOUND 24
+
 static inline int32_t
 biased_exponent (float f)
 {
@@ -65,10 +71,10 @@ atan2f (float y, float x)
   int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
 
   /* Special case for (x, y) either on or very close to the x axis. Either y =
-     0, or y is tiny and x is huge (difference in exponents >= 126). In the
-     second case, we only want to use this special case when x is negative (i.e.
-     quadrants 2 or 3).  */
-  if (unlikely (iay == 0 || (exp_diff >= 126 && m >= 2)))
+     0, or y is tiny and x is huge (difference in exponents >=
+     POLY_UFLOW_BOUND). In the second case, we only want to use this special
+     case when x is negative (i.e. quadrants 2 or 3).  */
+  if (unlikely (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
     {
       switch (m)
 	{
@@ -82,8 +88,9 @@ atan2f (float y, float x)
 	}
     }
   /* Special case for (x, y) either on or very close to the y axis. Either x =
-     0, or x is tiny and y is huge (difference in exponents >= 126).  */
-  if (unlikely (iax == 0 || exp_diff <= -126))
+     0, or x is tiny and y is huge (difference in exponents >=
+     POLY_UFLOW_BOUND).  */
+  if (unlikely (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
     return sign_y ? -PiOver2 : PiOver2;
 
   /* x is INF.  */
@@ -134,12 +141,22 @@ atan2f (float y, float x)
   float d = pred_aygtax ? ay : ax;
   float z = n / d;
 
-  /* Work out the correct shift.  */
-  float shift = sign_x ? -2.0f : 0.0f;
-  shift = pred_aygtax ? shift + 1.0f : shift;
-  shift *= PiOver2;
+  float ret;
+  if (unlikely (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
+    {
+      /* If (x, y) is very close to x axis and x is positive, the polynomial
+	 will underflow and evaluate to z.  */
+      ret = z;
+    }
+  else
+    {
+      /* Work out the correct shift.  */
+      float shift = sign_x ? -2.0f : 0.0f;
+      shift = pred_aygtax ? shift + 1.0f : shift;
+      shift *= PiOver2;
 
-  float ret = eval_poly (z, z, shift);
+      ret = eval_poly (z, z, shift);
+    }
 
   /* Account for the sign of x and y.  */
   return asfloat (asuint (ret) ^ sign_xy);
diff --git a/pl/math/atanf_common.h b/pl/math/atanf_common.h
index 55cee89..436b88b 100644
--- a/pl/math/atanf_common.h
+++ b/pl/math/atanf_common.h
@@ -21,8 +21,8 @@
 
 #else
 
-#define FLT_T double
-#define FMA fma
+#define FLT_T float
+#define FMA fmaf
 #define P(i) __atanf_poly_data.poly[i]
 
 #endif
author	Joe Ramsay <Joe.Ramsay@arm.com>	2022-06-22 10:00:21 +0100
committer	Joe Ramsay <joe.ramsay@arm.com>	2022-06-22 10:00:21 +0100
commit	90510ecbb921d79799e7d62eda43696f25b8e51e (patch)
tree	74f98c989b7448f55c5e9af72034caa6cab0cadf
parent	7f8252e7a0302c08f88a30fedebe7333662f1c01 (diff)
download	arm-optimized-routines-90510ecbb921d79799e7d62eda43696f25b8e51e.tar.gz