Upgrade arm-optimized-routines to v20.11 am: 59371d5a3d

Original change: https://android-review.googlesource.com/c/platform/external/arm-optimized-routines/+/1515050 Change-Id: I9c99706d82ceca15395995855d46e3e7d8c23ace
author: Haibo Huang <hhb@google.com> 2020-12-04 04:11:41 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> 2020-12-04 04:11:41 +0000
commit: a1ca34202964a49508d49854c8500f9e09b042ed (patch)
tree: 559634645b261c500bdd7b172b2ecf4f87e6dd78
parent: 0c47e0f8e36cd0cdeefb5f56ccfce78f7931f75d (diff)
parent: 59371d5a3dfd27747d9d29ab7a7d3470e157d9ee (diff)
download: arm-optimized-routines-a1ca34202964a49508d49854c8500f9e09b042ed.tar.gz
15 files changed, 585 insertions, 5 deletions
diff --git a/Android.bp b/Android.bp
index ef75290..78f1fdc 100755
--- a/Android.bp
+++ b/Android.bp
@@ -48,6 +48,15 @@ cc_defaults {
 cc_library {
     name: "libarm-optimized-routines-math",
     defaults: ["libarm-optimized-routines-defaults"],
+    exclude_srcs: [
+        // Provided by:
+        // bionic/libm/upstream-freebsd/lib/msun/src/s_erf.c
+        // bionic/libm/upstream-freebsd/lib/msun/src/s_erff.c
+        "math/erf.c",
+        "math/erf_data.c",
+        "math/erff.c",
+        "math/erff_data.c",
+    ],
     srcs: [
         "math/*.c",
     ],
diff --git a/METADATA b/METADATA
index a8d7b6f..76793c4 100644
--- a/METADATA
+++ b/METADATA
@@ -9,11 +9,11 @@ third_party {
     type: GIT
     value: "https://github.com/ARM-software/optimized-routines.git"
   }
-  version: "0f4ae0c5b561de25acb10130fd5e473ec038f89d"
+  version: "v20.11"
   license_type: NOTICE
   last_upgrade_date {
     year: 2020
-    month: 10
-    day: 28
+    month: 11
+    day: 30
   }
 }
diff --git a/README b/README
index 041aa1a..ae465e9 100644
--- a/README
+++ b/README
@@ -9,7 +9,7 @@ contributor-agreement.pdf. This is needed so upstreaming code
 to projects that require copyright assignment is possible.
 
 Regular quarterly releases are tagged as vYY.MM, the latest
-release is v20.08.
+release is v20.11.
 
 Source code layout:
 
diff --git a/math/erf.c b/math/erf.c
new file mode 100644
index 0000000..12d7e51
--- /dev/null
+++ b/math/erf.c
@@ -0,0 +1,244 @@
+/*
+ * Double-precision erf(x) function.
+ *
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+#include <math.h>
+#include <stdint.h>
+
+#define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
+#define C 0x1.b0ac16p-1
+#define PA __erf_data.erf_poly_A
+#define NA __erf_data.erf_ratio_N_A
+#define DA __erf_data.erf_ratio_D_A
+#define NB __erf_data.erf_ratio_N_B
+#define DB __erf_data.erf_ratio_D_B
+#define PC __erf_data.erfc_poly_C
+#define PD __erf_data.erfc_poly_D
+#define PE __erf_data.erfc_poly_E
+#define PF __erf_data.erfc_poly_F
+
+/* Top 32 bits of a double.  */
+static inline uint32_t
+top32 (double x)
+{
+  return asuint64 (x) >> 32;
+}
+
+/* Fast erf implementation using a mix of
+   rational and polynomial approximations.
+   Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.  */
+double
+erf (double x)
+{
+  /* Get top word and sign.  */
+  uint32_t ix = top32 (x);
+  uint32_t ia = ix & 0x7fffffff;
+  uint32_t sign = ix >> 31;
+
+  /* Normalized and subnormal cases */
+  if (ia < 0x3feb0000)
+    { /* a = |x| < 0.84375.  */
+
+      if (ia < 0x3e300000)
+	{ /* a < 2^(-28).  */
+	  if (ia < 0x00800000)
+	    { /* a < 2^(-1015).  */
+	      double y =  fma (TwoOverSqrtPiMinusOne, x, x);
+	      return check_uflow (y);
+	    }
+	  return x + TwoOverSqrtPiMinusOne * x;
+	}
+
+      double x2 = x * x;
+
+      if (ia < 0x3fe00000)
+	{ /* a < 0.5  - Use polynomial approximation.  */
+	  double r1 = fma (x2, PA[1], PA[0]);
+	  double r2 = fma (x2, PA[3], PA[2]);
+	  double r3 = fma (x2, PA[5], PA[4]);
+	  double r4 = fma (x2, PA[7], PA[6]);
+	  double r5 = fma (x2, PA[9], PA[8]);
+	  double x4 = x2 * x2;
+	  double r = r5;
+	  r = fma (x4, r, r4);
+	  r = fma (x4, r, r3);
+	  r = fma (x4, r, r2);
+	  r = fma (x4, r, r1);
+	  return fma (r, x, x); /* This fma is crucial for accuracy.  */
+	}
+      else
+	{ /* 0.5 <= a < 0.84375 - Use rational approximation.  */
+	  double x4, x8, r1n, r2n, r1d, r2d, r3d;
+
+	  r1n = fma (x2, NA[1], NA[0]);
+	  x4 = x2 * x2;
+	  r2n = fma (x2, NA[3], NA[2]);
+	  x8 = x4 * x4;
+	  r1d = fma (x2, DA[0], 1.0);
+	  r2d = fma (x2, DA[2], DA[1]);
+	  r3d = fma (x2, DA[4], DA[3]);
+	  double P = r1n + x4 * r2n + x8 * NA[4];
+	  double Q = r1d + x4 * r2d + x8 * r3d;
+	  return fma (P / Q, x, x);
+	}
+    }
+  else if (ia < 0x3ff40000)
+    { /* 0.84375 <= |x| < 1.25.  */
+      double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
+      double a = fabs (x) - 1.0;
+      r1n = fma (a, NB[1], NB[0]);
+      a2 = a * a;
+      r1d = fma (a, DB[0], 1.0);
+      a4 = a2 * a2;
+      r2n = fma (a, NB[3], NB[2]);
+      a6 = a4 * a2;
+      r2d = fma (a, DB[2], DB[1]);
+      r3n = fma (a, NB[5], NB[4]);
+      r3d = fma (a, DB[4], DB[3]);
+      r4n = NB[6];
+      r4d = DB[5];
+      double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
+      double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
+      if (sign)
+	return -C - P / Q;
+      else
+	return C + P / Q;
+    }
+  else if (ia < 0x40000000)
+    { /* 1.25 <= |x| < 2.0.  */
+      double a = fabs (x);
+      a = a - 1.25;
+
+      double r1 = fma (a, PC[1], PC[0]);
+      double r2 = fma (a, PC[3], PC[2]);
+      double r3 = fma (a, PC[5], PC[4]);
+      double r4 = fma (a, PC[7], PC[6]);
+      double r5 = fma (a, PC[9], PC[8]);
+      double r6 = fma (a, PC[11], PC[10]);
+      double r7 = fma (a, PC[13], PC[12]);
+      double r8 = fma (a, PC[15], PC[14]);
+
+      double a2 = a * a;
+
+      double r = r8;
+      r = fma (a2, r, r7);
+      r = fma (a2, r, r6);
+      r = fma (a2, r, r5);
+      r = fma (a2, r, r4);
+      r = fma (a2, r, r3);
+      r = fma (a2, r, r2);
+      r = fma (a2, r, r1);
+
+      if (sign)
+	return -1.0 + r;
+      else
+	return 1.0 - r;
+    }
+  else if (ia < 0x400a0000)
+    { /* 2 <= |x| < 3.25.  */
+      double a = fabs (x);
+      a = fma (0.5, a, -1.0);
+
+      double r1 = fma (a, PD[1], PD[0]);
+      double r2 = fma (a, PD[3], PD[2]);
+      double r3 = fma (a, PD[5], PD[4]);
+      double r4 = fma (a, PD[7], PD[6]);
+      double r5 = fma (a, PD[9], PD[8]);
+      double r6 = fma (a, PD[11], PD[10]);
+      double r7 = fma (a, PD[13], PD[12]);
+      double r8 = fma (a, PD[15], PD[14]);
+      double r9 = fma (a, PD[17], PD[16]);
+
+      double a2 = a * a;
+
+      double r = r9;
+      r = fma (a2, r, r8);
+      r = fma (a2, r, r7);
+      r = fma (a2, r, r6);
+      r = fma (a2, r, r5);
+      r = fma (a2, r, r4);
+      r = fma (a2, r, r3);
+      r = fma (a2, r, r2);
+      r = fma (a2, r, r1);
+
+      if (sign)
+	return -1.0 + r;
+      else
+	return 1.0 - r;
+    }
+  else if (ia < 0x40100000)
+    { /* 3.25 <= |x| < 4.0.  */
+      double a = fabs (x);
+      a = a - 3.25;
+
+      double r1 = fma (a, PE[1], PE[0]);
+      double r2 = fma (a, PE[3], PE[2]);
+      double r3 = fma (a, PE[5], PE[4]);
+      double r4 = fma (a, PE[7], PE[6]);
+      double r5 = fma (a, PE[9], PE[8]);
+      double r6 = fma (a, PE[11], PE[10]);
+      double r7 = fma (a, PE[13], PE[12]);
+
+      double a2 = a * a;
+
+      double r = r7;
+      r = fma (a2, r, r6);
+      r = fma (a2, r, r5);
+      r = fma (a2, r, r4);
+      r = fma (a2, r, r3);
+      r = fma (a2, r, r2);
+      r = fma (a2, r, r1);
+
+      if (sign)
+	return -1.0 + r;
+      else
+	return 1.0 - r;
+    }
+  else if (ia < 0x4017a000)
+    { /* 4 <= |x| < 5.90625.  */
+      double a = fabs (x);
+      a = fma (0.5, a, -2.0);
+
+      double r1 = fma (a, PF[1], PF[0]);
+      double r2 = fma (a, PF[3], PF[2]);
+      double r3 = fma (a, PF[5], PF[4]);
+      double r4 = fma (a, PF[7], PF[6]);
+      double r5 = fma (a, PF[9], PF[8]);
+      double r6 = fma (a, PF[11], PF[10]);
+      double r7 = fma (a, PF[13], PF[12]);
+      double r8 = fma (a, PF[15], PF[14]);
+      double r9 = PF[16];
+
+      double a2 = a * a;
+
+      double r = r9;
+      r = fma (a2, r, r8);
+      r = fma (a2, r, r7);
+      r = fma (a2, r, r6);
+      r = fma (a2, r, r5);
+      r = fma (a2, r, r4);
+      r = fma (a2, r, r3);
+      r = fma (a2, r, r2);
+      r = fma (a2, r, r1);
+
+      if (sign)
+	return -1.0 + r;
+      else
+	return 1.0 - r;
+    }
+  else
+    {
+      /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1.  */
+      if (unlikely (ia >= 0x7ff00000))
+	return (double) (1.0 - (sign << 1)) + 1.0 / x;
+
+      if (sign)
+	return -1.0;
+      else
+	return 1.0;
+    }
+}
diff --git a/math/erf_data.c b/math/erf_data.c
new file mode 100644
index 0000000..807875b
--- /dev/null
+++ b/math/erf_data.c
@@ -0,0 +1,85 @@
+/*
+ * Shared data between erf and erfc.
+ *
+ * Copyright (c) 2019-2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+/*
+Minimax approximation of erf
+*/
+const struct erf_data __erf_data = {
+.erf_poly_A = {
+#if ERF_POLY_A_NCOEFFS == 10
+0x1.06eba8214db68p-3, -0x1.812746b037948p-2, 0x1.ce2f21a03872p-4,
+-0x1.b82ce30e6548p-6, 0x1.565bcc360a2f2p-8, -0x1.c02d812bc979ap-11,
+0x1.f99bddfc1ebe9p-14, -0x1.f42c457cee912p-17, 0x1.b0e414ec20ee9p-20,
+-0x1.18c47fd143c5ep-23
+#endif
+},
+/* Rational approximation on [0x1p-28, 0.84375] */
+.erf_ratio_N_A = {
+0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,
+-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16
+},
+.erf_ratio_D_A = {
+0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,
+0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18
+},
+/* Rational approximation on [0.84375, 1.25] */
+.erf_ratio_N_B = {
+-0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2,
+0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5,
+-0x1.1bf380a96073fp-9
+},
+.erf_ratio_D_B = {
+0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4,
+0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7
+},
+.erfc_poly_C = {
+#if ERFC_POLY_C_NCOEFFS == 16
+/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25 */
+0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2,
+-0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5,
+-0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8,
+-0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12,
+0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16,
+-0x1.578c9e375d37p-19
+#endif
+},
+.erfc_poly_D = {
+#if ERFC_POLY_D_NCOEFFS == 18
+/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 */
+0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3,
+-0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2,
+-0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2,
+0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3,
+-0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3,
+0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10
+#endif
+},
+.erfc_poly_E = {
+#if ERFC_POLY_E_NCOEFFS == 14
+/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 */
+0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14,
+-0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12,
+0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14,
+-0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20,
+-0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23
+#endif
+},
+.erfc_poly_F = {
+#if ERFC_POLY_F_NCOEFFS == 17
+/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 */
+0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19,
+-0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14,
+0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11,
+-0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11,
+0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14,
+-0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19
+#endif
+}
+};
+
diff --git a/math/erff.c b/math/erff.c
new file mode 100644
index 0000000..a58e825
--- /dev/null
+++ b/math/erff.c
@@ -0,0 +1,104 @@
+/*
+ * Single-precision erf(x) function.
+ *
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include "math_config.h"
+
+#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
+#define A __erff_data.erff_poly_A
+#define B __erff_data.erff_poly_B
+
+/* Top 12 bits of a float.  */
+static inline uint32_t
+top12 (float x)
+{
+  return asuint (x) >> 20;
+}
+
+/* Efficient implementation of erff
+   using either a pure polynomial approximation or
+   the exponential of a polynomial.
+   Worst-case error is 1.09ulps at 0x1.c111acp-1.  */
+float
+erff (float x)
+{
+  float r, x2, u;
+
+  /* Get top word.  */
+  uint32_t ix = asuint (x);
+  uint32_t sign = ix >> 31;
+  uint32_t ia12 = top12 (x) & 0x7ff;
+
+  /* Limit of both intervals is 0.875 for performance reasons but coefficients
+     computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
+     from 0.94 to 1.1ulps.  */
+  if (ia12 < 0x3f6)
+    { /* a = |x| < 0.875.  */
+
+      /* Tiny and subnormal cases.  */
+      if (unlikely (ia12 < 0x318))
+	{ /* |x| < 2^(-28).  */
+	  if (unlikely (ia12 < 0x040))
+	    { /* |x| < 2^(-119).  */
+	      float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
+	      return check_uflowf (y);
+	    }
+	  return x + TwoOverSqrtPiMinusOne * x;
+	}
+
+      x2 = x * x;
+
+      /* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2).  */
+      r = A[5];
+      r = fmaf (r, x2, A[4]);
+      r = fmaf (r, x2, A[3]);
+      r = fmaf (r, x2, A[2]);
+      r = fmaf (r, x2, A[1]);
+      r = fmaf (r, x2, A[0]);
+      r = fmaf (r, x, x);
+    }
+  else if (ia12 < 0x408)
+    { /* |x| < 4.0 - Use a custom Estrin scheme.  */
+
+      float a = fabsf (x);
+      /* Start with Estrin scheme on high order (small magnitude) coefficients.  */
+      r = fmaf (B[6], a, B[5]);
+      u = fmaf (B[4], a, B[3]);
+      x2 = x * x;
+      r = fmaf (r, x2, u);
+      /* Then switch to pure Horner scheme.  */
+      r = fmaf (r, a, B[2]);
+      r = fmaf (r, a, B[1]);
+      r = fmaf (r, a, B[0]);
+      r = fmaf (r, a, a);
+      /* Single precision exponential with ~0.5ulps,
+	 ensures erff has max. rel. error
+	 < 1ulp on [0.921875, 4.0],
+	 < 1.1ulps on [0.875, 4.0].  */
+      r = expf (-r);
+      /* Explicit copysign (calling copysignf increases latency).  */
+      if (sign)
+	r = -1.0f + r;
+      else
+	r = 1.0f - r;
+    }
+  else
+    { /* |x| >= 4.0.  */
+
+      /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1.  */
+      if (unlikely (ia12 >= 0x7f8))
+	return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
+
+      /* Explicit copysign (calling copysignf increases latency).  */
+      if (sign)
+	r = -1.0f;
+      else
+	r = 1.0f;
+    }
+  return r;
+}
diff --git a/math/erff_data.c b/math/erff_data.c
new file mode 100644
index 0000000..fa6b1ef
--- /dev/null
+++ b/math/erff_data.c
@@ -0,0 +1,22 @@
+/*
+ * Data for approximation of erff.
+ *
+ * Copyright (c) 2019-2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+/* Minimax approximation of erff. */
+const struct erff_data __erff_data = {
+.erff_poly_A = {
+0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
+-0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f
+},
+.erff_poly_B = {
+0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f,
+-0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f,
+0x1.222900p-16f
+}
+};
+
diff --git a/math/math_config.h b/math/math_config.h
index 85fc584..09755ad 100644
--- a/math/math_config.h
+++ b/math/math_config.h
@@ -298,6 +298,24 @@ check_uflow (double x)
   return WANT_ERRNO ? __math_check_uflow (x) : x;
 }
 
+/* Check if the result overflowed to infinity.  */
+HIDDEN float __math_check_oflowf (float);
+/* Check if the result underflowed to 0.  */
+HIDDEN float __math_check_uflowf (float);
+
+/* Check if the result overflowed to infinity.  */
+static inline float
+check_oflowf (float x)
+{
+  return WANT_ERRNO ? __math_check_oflowf (x) : x;
+}
+
+/* Check if the result underflowed to 0.  */
+static inline float
+check_uflowf (float x)
+{
+  return WANT_ERRNO ? __math_check_uflowf (x) : x;
+}
 
 /* Shared between expf, exp2f and powf.  */
 #define EXP2F_TABLE_BITS 5
@@ -416,4 +434,29 @@ extern const struct pow_log_data
   struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
 } __pow_log_data HIDDEN;
 
+extern const struct erff_data
+{
+  float erff_poly_A[6];
+  float erff_poly_B[7];
+} __erff_data HIDDEN;
+
+#define ERF_POLY_A_ORDER 19
+#define ERF_POLY_A_NCOEFFS 10
+#define ERFC_POLY_C_NCOEFFS 16
+#define ERFC_POLY_D_NCOEFFS 18
+#define ERFC_POLY_E_NCOEFFS 14
+#define ERFC_POLY_F_NCOEFFS 17
+extern const struct erf_data
+{
+  double erf_poly_A[ERF_POLY_A_NCOEFFS];
+  double erf_ratio_N_A[5];
+  double erf_ratio_D_A[5];
+  double erf_ratio_N_B[7];
+  double erf_ratio_D_B[6];
+  double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
+  double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
+  double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
+  double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
+} __erf_data HIDDEN;
+
 #endif
diff --git a/math/math_errf.c b/math/math_errf.c
index 07154c5..771d982 100644
--- a/math/math_errf.c
+++ b/math/math_errf.c
@@ -64,3 +64,17 @@ __math_invalidf (float x)
   float y = (x - x) / (x - x);
   return isnan (x) ? y : with_errnof (y, EDOM);
 }
+
+/* Check result and set errno if necessary.  */
+
+HIDDEN float
+__math_check_uflowf (float y)
+{
+  return y == 0.0f ? with_errnof (y, ERANGE) : y;
+}
+
+HIDDEN float
+__math_check_oflowf (float y)
+{
+  return isinf (y) ? with_errnof (y, ERANGE) : y;
+}
diff --git a/math/test/mathbench.c b/math/test/mathbench.c
index 33ceda3..2e8aaf8 100644
--- a/math/test/mathbench.c
+++ b/math/test/mathbench.c
@@ -248,6 +248,7 @@ D (log2, 0.999, 1.001)
 {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
 D (xpow, 0.01, 11.1)
 D (ypow, -9.9, 9.9)
+D (erf, -6.0, 6.0)
 
 F (dummyf, 1.0, 2.0)
 F (expf, -9.9, 9.9)
@@ -275,6 +276,7 @@ F (cosf, -3.1, 3.1)
 F (cosf, 3.3, 33.3)
 F (cosf, 100, 1000)
 F (cosf, 1e6, 1e32)
+F (erff, -4.0, 4.0)
 #if WANT_VMATH
 D (__s_sin, -3.1, 3.1)
 D (__s_cos, -3.1, 3.1)
diff --git a/math/test/runulp.sh b/math/test/runulp.sh
index a8c391b..1d87a81 100755
--- a/math/test/runulp.sh
+++ b/math/test/runulp.sh
@@ -72,6 +72,16 @@ t pow  0x1.ffffffffffff0p-1  0x1.0000000000008p0 x 0x1p60 0x1p68 50000
 t pow  0x1.ffffffffff000p-1  0x1p0 x 0x1p50 0x1p52 50000
 t pow -0x1.ffffffffff000p-1 -0x1p0 x 0x1p50 0x1p52 50000
 
+L=1.0
+Ldir=0.9
+t erf  0 0xffff000000000000 10000
+t erf  0x1p-1022  0x1p-26   40000
+t erf  -0x1p-1022 -0x1p-26  40000
+t erf  0x1p-26    0x1p3     40000
+t erf  -0x1p-26  -0x1p3     40000
+t erf  0         inf        40000
+Ldir=0.5
+
 L=0.01
 t expf  0    0xffff0000    10000
 t expf  0x1p-14   0x1p8    50000
@@ -119,6 +129,17 @@ t powf  0x1p-70 0x1p70  x  0x1p-1 0x1p1   50000
 t powf  0x1p-70 0x1p70  x  -0x1p-1 -0x1p1 50000
 t powf  0x1.ep-1 0x1.1p0 x  0x1p8 0x1p14  50000
 t powf  0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
+
+L=0.6
+Ldir=0.9
+t erff  0      0xffff0000 10000
+t erff  0x1p-127  0x1p-26 40000
+t erff -0x1p-127 -0x1p-26 40000
+t erff  0x1p-26   0x1p3   40000
+t erff -0x1p-26  -0x1p3   40000
+t erff  0         inf     40000
+Ldir=0.5
+
 done
 
 # vector functions
diff --git a/math/test/testcases/directed/erf.tst b/math/test/testcases/directed/erf.tst
new file mode 100644
index 0000000..7fa4d18
--- /dev/null
+++ b/math/test/testcases/directed/erf.tst
@@ -0,0 +1,17 @@
+; erf.tst - Directed test cases for erf
+;
+; Copyright (c) 2007-2020, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=erf op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=erf op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=erf op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=erf op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=erf op1=7ff00000.00000000 result=3ff00000.00000000 errno=0
+func=erf op1=fff00000.00000000 result=bff00000.00000000 errno=0
+func=erf op1=00000000.00000000 result=00000000.00000000 errno=ERANGE
+func=erf op1=80000000.00000000 result=80000000.00000000 errno=ERANGE
+func=erf op1=00000000.00000001 result=00000000.00000001 errno=0 status=ux
+func=erf op1=80000000.00000001 result=80000000.00000001 errno=0 status=ux
+func=erf op1=3ff00000.00000000 result=3feaf767.a741088a.c6d errno=0
+func=erf op1=bff00000.00000000 result=bfeaf767.a741088a.c6d errno=0
diff --git a/math/test/testcases/directed/erff.tst b/math/test/testcases/directed/erff.tst
new file mode 100644
index 0000000..d05b7b1
--- /dev/null
+++ b/math/test/testcases/directed/erff.tst
@@ -0,0 +1,17 @@
+; erff.tst
+;
+; Copyright (c) 2007-2020, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=erff op1=7fc00001 result=7fc00001 errno=0
+func=erff op1=ffc00001 result=7fc00001 errno=0
+func=erff op1=7f800001 result=7fc00001 errno=0 status=i
+func=erff op1=ff800001 result=7fc00001 errno=0 status=i
+func=erff op1=7f800000 result=3f800000 errno=0
+func=erff op1=ff800000 result=bf800000 errno=0
+func=erff op1=00000000 result=00000000 errno=ERANGE
+func=erff op1=80000000 result=80000000 errno=ERANGE
+func=erff op1=00000001 result=00000001 errno=0 status=ux
+func=erff op1=80000001 result=80000001 errno=0 status=ux
+func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0
+func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0
diff --git a/math/test/ulp.c b/math/test/ulp.c
index 371567a..05a5321 100644
--- a/math/test/ulp.c
+++ b/math/test/ulp.c
@@ -331,11 +331,13 @@ static const struct fun fun[] = {
  F1 (log)
  F1 (log2)
  F2 (pow)
+ F1 (erf)
  D1 (exp)
  D1 (exp2)
  D1 (log)
  D1 (log2)
  D2 (pow)
+ D1 (erf)
 #if WANT_VMATH
  F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0)
  F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0)
diff --git a/networking/test/chksum.c b/networking/test/chksum.c
index 50722a4..41b9812 100644
--- a/networking/test/chksum.c
+++ b/networking/test/chksum.c
@@ -1,7 +1,7 @@
 /*
  * Ones' complement checksum test & benchmark
  *
- * Copyright 2016-2020 ARM Limited
+ * Copyright (c) 2016-2020, Arm Limited.
  * SPDX-License-Identifier: MIT
  */
author	Haibo Huang <hhb@google.com>	2020-12-04 04:11:41 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	2020-12-04 04:11:41 +0000
commit	a1ca34202964a49508d49854c8500f9e09b042ed (patch)
tree	559634645b261c500bdd7b172b2ecf4f87e6dd78
parent	0c47e0f8e36cd0cdeefb5f56ccfce78f7931f75d (diff)
parent	59371d5a3dfd27747d9d29ab7a7d3470e157d9ee (diff)
download	arm-optimized-routines-a1ca34202964a49508d49854c8500f9e09b042ed.tar.gz