diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2022-07-12 09:13:09 +0100 |
---|---|---|
committer | Joe Ramsay <joe.ramsay@arm.com> | 2022-07-12 09:13:09 +0100 |
commit | ea3ad6c20ddd99aa1ee3f86e8f4bc5fe76cee35c (patch) | |
tree | 4ce195532f2d8bfafa38425b7b7ea51be0e7104d | |
parent | d9a816bb547d3acc29e343c85dc568ff86add1c9 (diff) | |
download | arm-optimized-routines-ea3ad6c20ddd99aa1ee3f86e8f4bc5fe76cee35c.tar.gz |
pl/math: Add scalar asinhf
asinhf depends on logf, which has been copied over from the main math
directory. The only modification was to change the name logf to
optr_aor_log_f32 to resolve any ambiguity with libm.
Worst-case error is about 3.4 ULP, at very large input. There are 4
intervals with slightly different error behaviour, as follows:
Interval Worst-case accuracy (ulp)
|x| < 2^-12 0
|x| < 1 1.3
|x| < sqrt(FLT_MAX) 2.0
|x| < infinity 3.4
-rw-r--r-- | pl/math/asinhf_3u5.c | 77 | ||||
-rw-r--r-- | pl/math/asinhf_data.c | 14 | ||||
-rw-r--r-- | pl/math/include/mathlib.h | 1 | ||||
-rw-r--r-- | pl/math/logf.c | 75 | ||||
-rw-r--r-- | pl/math/math_config.h | 6 | ||||
-rw-r--r-- | pl/math/test/mathbench_funcs.h | 1 | ||||
-rwxr-xr-x | pl/math/test/runulp.sh | 6 | ||||
-rw-r--r-- | pl/math/test/testcases/directed/asinhf.tst | 18 | ||||
-rw-r--r-- | pl/math/test/ulp_funcs.h | 1 | ||||
-rw-r--r-- | pl/math/tools/asinhf.sollya | 29 |
10 files changed, 228 insertions, 0 deletions
diff --git a/pl/math/asinhf_3u5.c b/pl/math/asinhf_3u5.c new file mode 100644 index 0000000..10f9f31 --- /dev/null +++ b/pl/math/asinhf_3u5.c @@ -0,0 +1,77 @@ +/* + * Single-precision asinh(x) function. + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "math_config.h" + +#define AbsMask (0x7fffffff) +#define SqrtFltMax (0x1.749e96p+10f) +#define Ln2 (0x1.62e4p-1f) +#define One (0x3f8) +#define ExpM12 (0x398) +#define QNaN (0x7fc) + +#define C(i) __asinhf_data.coeffs[i] + +float +optr_aor_log_f32 (float); + +/* asinhf approximation using a variety of approaches on different intervals: + + |x| < 2^-12: Return x. Function is exactly rounded in this region. + + |x| < 1.0: Use custom order-8 polynomial. The largest observed + error in this region is 1.3ulps: + asinhf(0x1.f0f74cp-1) got 0x1.b88de4p-1 want 0x1.b88de2p-1. + + |x| <= SqrtFltMax: Calculate the result directly using the + definition of asinh(x) = ln(x + sqrt(x*x + 1)). The largest + observed error in this region is 1.99ulps. + asinhf(0x1.00e358p+0) got 0x1.c4849ep-1 want 0x1.c484a2p-1. + + |x| > SqrtFltMax: We cannot square x without overflow at a low + cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot + even double x without overflow, so calculate this as ln(x) + + ln(2). This largest observed error in this region is 3.39ulps. + asinhf(0x1.749e9ep+10) got 0x1.fffff8p+2 want 0x1.fffffep+2. */ +float +asinhf (float x) +{ + uint32_t ix = asuint (x); + uint32_t ia = ix & AbsMask; + uint32_t ia12 = ia >> 20; + float ax = asfloat (ia); + uint32_t sign = ix & ~AbsMask; + + if (ia12 < ExpM12 || ia12 == QNaN) + { + return x; + } + + if (ia12 < One) + { + float x2 = ax * ax; + float x4 = x2 * x2; + + float p_01 = fmaf (ax, C (1), C (0)); + float p_23 = fmaf (ax, C (3), C (2)); + float p_45 = fmaf (ax, C (5), C (4)); + float p_67 = fmaf (ax, C (7), C (6)); + + float p_03 = fmaf (x2, p_23, p_01); + float p_47 = fmaf (x2, p_67, p_45); + + float p = fmaf (x4, p_47, p_03); + float y = fmaf (x2, p, ax); + return asfloat (asuint (y) | sign); + } + + if (unlikely (ax > SqrtFltMax)) + { + return asfloat (asuint (optr_aor_log_f32 (ax) + Ln2) | sign); + } + + return asfloat (asuint (optr_aor_log_f32 (ax + sqrtf (ax * ax + 1))) | sign); +} diff --git a/pl/math/asinhf_data.c b/pl/math/asinhf_data.c new file mode 100644 index 0000000..ce9b632 --- /dev/null +++ b/pl/math/asinhf_data.c @@ -0,0 +1,14 @@ +/* + * Coefficients for single-precision asinh(x) function. + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "math_config.h" + +/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya for + these coeffs were generated. */ +const struct asinhf_data __asinhf_data + = {.coeffs + = {-0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f, 0x1.3a81dcp-4f, + 0x1.65bbaap-10f, -0x1.057f1p-4f, 0x1.6c1d46p-5f, -0x1.4cafe8p-7f}}; diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h index 0b7a745..c566d12 100644 --- a/pl/math/include/mathlib.h +++ b/pl/math/include/mathlib.h @@ -9,6 +9,7 @@ #ifndef _MATHLIB_H #define _MATHLIB_H +float asinhf (float); float atan2f (float, float); float erfcf (float); float erff (float); diff --git a/pl/math/logf.c b/pl/math/logf.c new file mode 100644 index 0000000..2962ee7 --- /dev/null +++ b/pl/math/logf.c @@ -0,0 +1,75 @@ +/* + * Single-precision log function. + * + * Copyright (c) 2017-2019, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include <math.h> +#include <stdint.h> +#include "math_config.h" + +/* +LOGF_TABLE_BITS = 4 +LOGF_POLY_ORDER = 4 + +ULP error: 0.818 (nearest rounding.) +Relative error: 1.957 * 2^-26 (before rounding.) +*/ + +#define T __logf_data.tab +#define A __logf_data.poly +#define Ln2 __logf_data.ln2 +#define N (1 << LOGF_TABLE_BITS) +#define OFF 0x3f330000 + +float +optr_aor_log_f32 (float x) +{ + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, r2, y, y0, invc, logc; + uint32_t ix, iz, tmp; + int k, i; + + ix = asuint (x); +#if WANT_ROUNDING + /* Fix sign of zero with downward rounding when x==1. */ + if (unlikely (ix == 0x3f800000)) + return 0; +#endif + if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000)) + { + /* x < 0x1p-126 or inf or nan. */ + if (ix * 2 == 0) + return __math_divzerof (1); + if (ix == 0x7f800000) /* log(inf) == inf. */ + return x; + if ((ix & 0x80000000) || ix * 2 >= 0xff000000) + return __math_invalidf (x); + /* x is subnormal, normalize it. */ + ix = asuint (x * 0x1p23f); + ix -= 23 << 23; + } + + /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; + k = (int32_t) tmp >> 23; /* arithmetic shift */ + iz = ix - (tmp & 0x1ff << 23); + invc = T[i].invc; + logc = T[i].logc; + z = (double_t) asfloat (iz); + + /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */ + r = z * invc - 1; + y0 = logc + (double_t) k * Ln2; + + /* Pipelined polynomial evaluation to approximate log1p(r). */ + r2 = r * r; + y = A[1] * r + A[2]; + y = A[0] * r2 + y; + y = y * r2 + (y0 + r); + return eval_as_float (y); +} diff --git a/pl/math/math_config.h b/pl/math/math_config.h index 231d6bc..e77170e 100644 --- a/pl/math/math_config.h +++ b/pl/math/math_config.h @@ -419,4 +419,10 @@ extern const struct atanf_poly_data { float poly[ATANF_POLY_NCOEFFS]; } __atanf_poly_data HIDDEN; + +#define ASINHF_NCOEFFS 8 +extern const struct asinhf_data +{ + float coeffs[ASINHF_NCOEFFS]; +} __asinhf_data HIDDEN; #endif diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h index 0f8e0ca..45ee627 100644 --- a/pl/math/test/mathbench_funcs.h +++ b/pl/math/test/mathbench_funcs.h @@ -5,6 +5,7 @@ * Copyright (c) 2022, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ +F (asinhf, -10.0, 10.0) F (atanf, -10.0, 10.0) {"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}}, F (erfcf, -4.0, 10.0) diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh index 17b13ae..746562d 100755 --- a/pl/math/test/runulp.sh +++ b/pl/math/test/runulp.sh @@ -85,6 +85,12 @@ t atan2f 0.0 1.0 40000 t atan2f 1.0 100.0 40000 t atan2f 1e6 1e32 40000 +L=3.0 +t asinhf 0 0x1p-12 5000 +t asinhf 0x1p-12 1.0 50000 +t asinhf 1.0 0x1p11 50000 +t asinhf 0x1p11 0x1p127 20000 + done # vector functions diff --git a/pl/math/test/testcases/directed/asinhf.tst b/pl/math/test/testcases/directed/asinhf.tst new file mode 100644 index 0000000..d832056 --- /dev/null +++ b/pl/math/test/testcases/directed/asinhf.tst @@ -0,0 +1,18 @@ +; asinhf.tst +; +; Copyright (c) 2007-2022, Arm Limited. +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +func=asinhf op1=7fc00001 result=7fc00001 errno=0 +func=asinhf op1=ffc00001 result=7fc00001 errno=0 +func=asinhf op1=7f800001 result=7fc00001 errno=0 status=i +func=asinhf op1=ff800001 result=7fc00001 errno=0 status=i +func=asinhf op1=7f800000 result=7f800000 errno=0 +func=asinhf op1=ff800000 result=ff800000 errno=0 +func=asinhf op1=00000000 result=00000000 errno=0 +func=asinhf op1=80000000 result=80000000 errno=0 +; No exception is raised on certain machines (different version of glibc) +; Same issue encountered with other function similar to x close to 0 +; Could be due to function so boring no flop is involved in some implementations +func=asinhf op1=00000001 result=00000001 errno=0 maybestatus=ux +func=asinhf op1=80000001 result=80000001 errno=0 maybestatus=ux diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h index 40fae51..28ef25c 100644 --- a/pl/math/test/ulp_funcs.h +++ b/pl/math/test/ulp_funcs.h @@ -4,6 +4,7 @@ * Copyright (c) 2022, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ +F1 (asinh) F2 (atan2) F1 (erfc) F1 (erf) diff --git a/pl/math/tools/asinhf.sollya b/pl/math/tools/asinhf.sollya new file mode 100644 index 0000000..cbe7d62 --- /dev/null +++ b/pl/math/tools/asinhf.sollya @@ -0,0 +1,29 @@ +// polynomial for approximating asinh(x) +// +// Copyright (c) 2022, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 9; + +a = 0x1.0p-12; +b = 1.0; + +f = proc(y) { + return asinh(x); +}; + +approx = proc(poly, d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +poly = x; +for i from 2 to deg do { + p = roundcoefficients(approx(poly,i), [|SG ...|]); + poly = poly + x^i*coeff(p,0); +}; + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 2 to deg do coeff(poly,i); |