diff options
Diffstat (limited to 'math')
126 files changed, 1020 insertions, 475 deletions
diff --git a/math/Dir.mk b/math/Dir.mk index 3b841ab..2a9cad1 100644 --- a/math/Dir.mk +++ b/math/Dir.mk @@ -1,7 +1,7 @@ # Makefile fragment - requires GNU make # -# Copyright (c) 2019, Arm Limited. -# SPDX-License-Identifier: MIT +# Copyright (c) 2019-2022, Arm Limited. +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception S := $(srcdir)/math B := build/math @@ -15,6 +15,7 @@ math-test-srcs := \ math-test-host-srcs := $(wildcard $(S)/test/rtest/*.[cS]) math-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h)) +math-test-includes := $(patsubst $(S)/%,build/include/%,$(wildcard $(S)/test/*.h)) math-libs := \ build/lib/libmathlib.so \ @@ -42,10 +43,11 @@ math-files := \ $(math-tools) \ $(math-host-tools) \ $(math-includes) \ + $(math-test-includes) \ -all-math: $(math-libs) $(math-tools) $(math-includes) +all-math: $(math-libs) $(math-tools) $(math-includes) $(math-test-includes) -$(math-objs): $(math-includes) +$(math-objs): $(math-includes) $(math-test-includes) $(math-objs): CFLAGS_ALL += $(math-cflags) $(B)/test/mathtest.o: CFLAGS_ALL += -fmath-errno $(math-host-objs): CC = $(HOST_CC) @@ -83,6 +85,9 @@ build/bin/ulp: $(B)/test/ulp.o build/lib/libmathlib.a build/include/%.h: $(S)/include/%.h cp $< $@ +build/include/test/%.h: $(S)/test/%.h + cp $< $@ + build/bin/%.sh: $(S)/test/%.sh cp $< $@ @@ -96,7 +101,7 @@ check-math-rtest: $(math-host-tools) $(math-tools) cat $(math-rtests) | build/bin/rtest | $(EMULATOR) build/bin/mathtest $(math-testflags) check-math-ulp: $(math-tools) - ULPFLAGS="$(math-ulpflags)" build/bin/runulp.sh $(EMULATOR) + ULPFLAGS="$(math-ulpflags)" WANT_SIMD_EXCEPT="$(WANT_SIMD_EXCEPT)" build/bin/runulp.sh $(EMULATOR) check-math: check-math-test check-math-rtest check-math-ulp diff --git a/math/README.contributors b/math/README.contributors new file mode 100644 index 0000000..33e7ba3 --- /dev/null +++ b/math/README.contributors @@ -0,0 +1,78 @@ +STYLE REQUIREMENTS +================== + +1. Most code in this sub-directory is expected to be upstreamed into glibc so + the GNU Coding Standard and glibc specific conventions should be followed + to ease upstreaming. + +2. ABI and symbols: the code should be written so it is suitable for inclusion + into a libc with minimal changes. This e.g. means that internal symbols + should be hidden and in the implementation reserved namespace according to + ISO C and POSIX rules. If possible the built shared libraries and static + library archives should be usable to override libc symbols at link time (or + at runtime via LD_PRELOAD). This requires the symbols to follow the glibc ABI + (other than symbol versioning), this cannot be done reliably for static + linking so this is a best effort requirement. + +3. API: include headers should be suitable for benchmarking and testing code + and should not conflict with libc headers. + + +CONTRIBUTION GUIDELINES FOR math SUB-DIRECTORY +============================================== + +1. Math functions have quality and performance requirements. + +2. Quality: + - Worst-case ULP error should be small in the entire input domain (for most + common double precision scalar functions the target is < 0.66 ULP error, + and < 1 ULP for single precision, even performance optimized function + variant should not have > 5 ULP error if the goal is to be a drop in + replacement for a standard math function), this should be tested + statistically (or on all inputs if possible in reasonable amount of time). + The ulp tool is for this and runulp.sh should be updated for new functions. + + - All standard rounding modes need to be supported but in non-default rounding + modes the quality requirement can be relaxed. (Non-nearest rounded + computation can be slow and inaccurate but has to be correct for conformance + reasons.) + + - Special cases and error handling need to follow ISO C Annex F requirements, + POSIX requirements, IEEE 754-2008 requirements and Glibc requiremnts: + https://www.gnu.org/software/libc/manual/html_mono/libc.html#Errors-in-Math-Functions + this should be tested by direct tests (glibc test system may be used for it). + + - Error handling code should be decoupled from the approximation code as much + as possible. (There are helper functions, these take care of errno as well + as exception raising.) + + - Vector math code does not need to work in non-nearest rounding mode and error + handling side effects need not happen (fenv exceptions and errno), but the + result should be correct (within quality requirements, which are lower for + vector code than for scalar code). + + - Error bounds of the approximation should be clearly documented. + + - The code should build and pass tests on arm, aarch64 and x86_64 GNU linux + systems. (Routines and features can be disabled on specific targets, but + the build must complete). On aarch64, both little- and big-endian targets + are supported as well as valid combinations of architecture extensions. + The configurations that should be tested depend on the contribution. + +3. Performance: + - Common math code should be benchmarked on modern aarch64 microarchitectures + over typical inputs. + + - Performance improvements should be documented (relative numbers can be + published; it is enough to use the mathbench microbenchmark tool which should + be updated for new functions). + + - Attention should be paid to the compilation flags: for aarch64 fma + contraction should be on and math errno turned off so some builtins can be + inlined. + + - The code should be reasonably performant on x86_64 too, e.g. some rounding + instructions and fma may not be available on x86_64, such builtins turn into + libc calls with slow code. Such slowdown is not acceptable, a faster fallback + should be present: glibc and bionic use the same code on all targets. (This + does not apply to vector math code). diff --git a/math/cosf.c b/math/cosf.c index f29f194..6293ce8 100644 --- a/math/cosf.c +++ b/math/cosf.c @@ -1,8 +1,8 @@ /* * Single-precision cos function. * - * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2021, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> @@ -22,7 +22,7 @@ cosf (float y) int n; const sincos_t *p = &__sincosf_table[0]; - if (abstop12 (y) < abstop12 (pio4)) + if (abstop12 (y) < abstop12 (pio4f)) { double x2 = x * x; @@ -2,7 +2,7 @@ * Double-precision erf(x) function. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/erf_data.c b/math/erf_data.c index 807875b..10cf1fa 100644 --- a/math/erf_data.c +++ b/math/erf_data.c @@ -2,7 +2,7 @@ * Shared data between erf and erfc. * * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/erff.c b/math/erff.c index a58e825..9fa476d 100644 --- a/math/erff.c +++ b/math/erff.c @@ -2,7 +2,7 @@ * Single-precision erf(x) function. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/math/erff_data.c b/math/erff_data.c index fa6b1ef..f822788 100644 --- a/math/erff_data.c +++ b/math/erff_data.c @@ -2,7 +2,7 @@ * Data for approximation of erff. * * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" @@ -2,7 +2,7 @@ * Double-precision e^x function. * * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <float.h> diff --git a/math/exp2.c b/math/exp2.c index 35ab39f..a1eee44 100644 --- a/math/exp2.c +++ b/math/exp2.c @@ -2,7 +2,7 @@ * Double-precision 2^x function. * * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <float.h> diff --git a/math/exp2f.c b/math/exp2f.c index 94b3253..776c3dd 100644 --- a/math/exp2f.c +++ b/math/exp2f.c @@ -2,7 +2,7 @@ * Single-precision 2^x function. * * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <math.h> diff --git a/math/exp2f_data.c b/math/exp2f_data.c index 3fb0ad1..f0cb7fc 100644 --- a/math/exp2f_data.c +++ b/math/exp2f_data.c @@ -2,7 +2,7 @@ * Shared data between expf, exp2f and powf. * * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/exp_data.c b/math/exp_data.c index cba7683..714c845 100644 --- a/math/exp_data.c +++ b/math/exp_data.c @@ -2,7 +2,7 @@ * Shared data between exp, exp2 and pow. * * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/expf.c b/math/expf.c index 9b2f0c3..08a20d5 100644 --- a/math/expf.c +++ b/math/expf.c @@ -2,7 +2,7 @@ * Single-precision e^x function. * * Copyright (c) 2017-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <math.h> diff --git a/math/include/mathlib.h b/math/include/mathlib.h index 279d829..c520c37 100644 --- a/math/include/mathlib.h +++ b/math/include/mathlib.h @@ -2,7 +2,7 @@ * Public API. * * Copyright (c) 2015-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _MATHLIB_H @@ -2,7 +2,7 @@ * Double-precision log(x) function. * * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <float.h> diff --git a/math/log2.c b/math/log2.c index 55102b7..3f9c21b 100644 --- a/math/log2.c +++ b/math/log2.c @@ -2,7 +2,7 @@ * Double-precision log2(x) function. * * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <float.h> diff --git a/math/log2_data.c b/math/log2_data.c index 3fc9b47..293bd7d 100644 --- a/math/log2_data.c +++ b/math/log2_data.c @@ -2,7 +2,7 @@ * Data for log2. * * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/log2f.c b/math/log2f.c index acb629e..0a44fa2 100644 --- a/math/log2f.c +++ b/math/log2f.c @@ -2,7 +2,7 @@ * Single-precision log2 function. * * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <math.h> diff --git a/math/log2f_data.c b/math/log2f_data.c index f3546d7..4866ef7 100644 --- a/math/log2f_data.c +++ b/math/log2f_data.c @@ -2,7 +2,7 @@ * Data definition for log2f. * * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/log_data.c b/math/log_data.c index 96a098d..3ecc1f4 100644 --- a/math/log_data.c +++ b/math/log_data.c @@ -2,7 +2,7 @@ * Data for log. * * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/logf.c b/math/logf.c index cfbaee1..820f74c 100644 --- a/math/logf.c +++ b/math/logf.c @@ -1,8 +1,8 @@ /* * Single-precision log function. * - * Copyright (c) 2017-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2017-2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <math.h> @@ -57,7 +57,7 @@ logf (float x) tmp = ix - OFF; i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; k = (int32_t) tmp >> 23; /* arithmetic shift */ - iz = ix - (tmp & 0x1ff << 23); + iz = ix - (tmp & 0xff800000); invc = T[i].invc; logc = T[i].logc; z = (double_t) asfloat (iz); diff --git a/math/logf_data.c b/math/logf_data.c index e8973ce..0424768 100644 --- a/math/logf_data.c +++ b/math/logf_data.c @@ -2,7 +2,7 @@ * Data definition for logf. * * Copyright (c) 2017-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/math_config.h b/math/math_config.h index e851043..7ffc0cd 100644 --- a/math/math_config.h +++ b/math/math_config.h @@ -2,7 +2,7 @@ * Configuration for math routines. * * Copyright (c) 2017-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _MATH_CONFIG_H diff --git a/math/math_err.c b/math/math_err.c index 1bf9538..cfe0728 100644 --- a/math/math_err.c +++ b/math/math_err.c @@ -2,7 +2,7 @@ * Double-precision math error handling. * * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/math_errf.c b/math/math_errf.c index d5350b8..4233918 100644 --- a/math/math_errf.c +++ b/math/math_errf.c @@ -2,7 +2,7 @@ * Single-precision math error handling. * * Copyright (c) 2017-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" @@ -2,7 +2,7 @@ * Double-precision x^y function. * * Copyright (c) 2018-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <float.h> diff --git a/math/pow_log_data.c b/math/pow_log_data.c index 45569c5..2a4c250 100644 --- a/math/pow_log_data.c +++ b/math/pow_log_data.c @@ -2,7 +2,7 @@ * Data for the log part of pow. * * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/powf.c b/math/powf.c index 6ba45d3..05c80bb 100644 --- a/math/powf.c +++ b/math/powf.c @@ -2,7 +2,7 @@ * Single-precision pow function. * * Copyright (c) 2017-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <math.h> diff --git a/math/powf_log2_data.c b/math/powf_log2_data.c index 97e0d98..243836a 100644 --- a/math/powf_log2_data.c +++ b/math/powf_log2_data.c @@ -2,7 +2,7 @@ * Data definition for powf. * * Copyright (c) 2017-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" diff --git a/math/s_cos.c b/math/s_cos.c index 53a95b0..e66d563 100644 --- a/math/s_cos.c +++ b/math/s_cos.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_cos.c" diff --git a/math/s_cosf.c b/math/s_cosf.c index 914c02e..f615d26 100644 --- a/math/s_cosf.c +++ b/math/s_cosf.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_cosf.c" diff --git a/math/s_exp.c b/math/s_exp.c index ac7246b..5da0099 100644 --- a/math/s_exp.c +++ b/math/s_exp.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_exp.c" diff --git a/math/s_exp2f.c b/math/s_exp2f.c index df7dfd6..dcbfea9 100644 --- a/math/s_exp2f.c +++ b/math/s_exp2f.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_exp2f.c" diff --git a/math/s_exp2f_1u.c b/math/s_exp2f_1u.c index 5e3852b..bf387e4 100644 --- a/math/s_exp2f_1u.c +++ b/math/s_exp2f_1u.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_exp2f_1u.c" diff --git a/math/s_expf.c b/math/s_expf.c index 3492c46..dacda7f 100644 --- a/math/s_expf.c +++ b/math/s_expf.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_expf.c" diff --git a/math/s_expf_1u.c b/math/s_expf_1u.c index eb7bbcb..0009644 100644 --- a/math/s_expf_1u.c +++ b/math/s_expf_1u.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_expf_1u.c" diff --git a/math/s_log.c b/math/s_log.c index 23289cf..27d2eb2 100644 --- a/math/s_log.c +++ b/math/s_log.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_log.c" diff --git a/math/s_logf.c b/math/s_logf.c index 9399350..7d98b2b 100644 --- a/math/s_logf.c +++ b/math/s_logf.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_logf.c" diff --git a/math/s_pow.c b/math/s_pow.c index 2e34c9f..6eca2b2 100644 --- a/math/s_pow.c +++ b/math/s_pow.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_pow.c" diff --git a/math/s_powf.c b/math/s_powf.c index 6d91a4a..1d55d90 100644 --- a/math/s_powf.c +++ b/math/s_powf.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_powf.c" diff --git a/math/s_sin.c b/math/s_sin.c index 06982c2..0c61712 100644 --- a/math/s_sin.c +++ b/math/s_sin.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_sin.c" diff --git a/math/s_sinf.c b/math/s_sinf.c index 68ca908..3aae611 100644 --- a/math/s_sinf.c +++ b/math/s_sinf.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define SCALAR 1 #include "v_sinf.c" diff --git a/math/sincosf.c b/math/sincosf.c index 9746f1c..446f21d 100644 --- a/math/sincosf.c +++ b/math/sincosf.c @@ -1,8 +1,8 @@ /* * Single-precision sin/cos function. * - * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2021, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> @@ -22,7 +22,7 @@ sincosf (float y, float *sinp, float *cosp) int n; const sincos_t *p = &__sincosf_table[0]; - if (abstop12 (y) < abstop12 (pio4)) + if (abstop12 (y) < abstop12 (pio4f)) { double x2 = x * x; diff --git a/math/sincosf.h b/math/sincosf.h index 1e80fc9..ec23ed7 100644 --- a/math/sincosf.h +++ b/math/sincosf.h @@ -1,8 +1,8 @@ /* * Header for sinf, cosf and sincosf. * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2021, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> @@ -12,7 +12,7 @@ /* 2PI * 2^-64. */ static const double pi63 = 0x1.921FB54442D18p-62; /* PI / 4. */ -static const double pio4 = 0x1.921FB54442D18p-1; +static const float pio4f = 0x1.921FB6p-1f; /* The constants and polynomials for sine and cosine. */ typedef struct diff --git a/math/sincosf_data.c b/math/sincosf_data.c index ab4ac47..2252529 100644 --- a/math/sincosf_data.c +++ b/math/sincosf_data.c @@ -2,7 +2,7 @@ * Data definition for sinf, cosf and sincosf. * * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/math/sinf.c b/math/sinf.c index ddbc1da..8dd8ae4 100644 --- a/math/sinf.c +++ b/math/sinf.c @@ -1,8 +1,8 @@ /* * Single-precision sin function. * - * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2021, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <math.h> @@ -21,7 +21,7 @@ sinf (float y) int n; const sincos_t *p = &__sincosf_table[0]; - if (abstop12 (y) < abstop12 (pio4)) + if (abstop12 (y) < abstop12 (pio4f)) { s = x * x; diff --git a/math/test/mathbench.c b/math/test/mathbench.c index 0c17826..6e18e36 100644 --- a/math/test/mathbench.c +++ b/math/test/mathbench.c @@ -1,8 +1,8 @@ /* * Microbenchmark for math functions. * - * Copyright (c) 2018-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #undef _GNU_SOURCE @@ -66,6 +66,43 @@ v_float_dup (float x) { return (v_float){x, x, x, x}; } +#if WANT_SVE_MATH +#include <arm_sve.h> +typedef svbool_t sv_bool; +typedef svfloat64_t sv_double; + +#define sv_double_len() svcntd() + +static inline sv_double +sv_double_load (const double *p) +{ + svbool_t pg = svptrue_b64(); + return svld1(pg, p); +} + +static inline sv_double +sv_double_dup (double x) +{ + return svdup_n_f64(x); +} + +typedef svfloat32_t sv_float; + +#define sv_float_len() svcntw() + +static inline sv_float +sv_float_load (const float *p) +{ + svbool_t pg = svptrue_b32(); + return svld1(pg, p); +} + +static inline sv_float +sv_float_dup (float x) +{ + return svdup_n_f32(x); +} +#endif #else /* dummy definitions to make things compile. */ typedef double v_double; @@ -89,7 +126,6 @@ dummyf (float x) { return x; } - #if WANT_VMATH #if __aarch64__ static v_double @@ -116,101 +152,25 @@ __vn_dummyf (v_float x) { return x; } - -__vpcs static v_float -xy__vn_powf (v_float x) -{ - return __vn_powf (x, x); -} - -__vpcs static v_float -xy_Z_powf (v_float x) +#endif +#if WANT_SVE_MATH +static sv_double +__sv_dummy (sv_double x, sv_bool pg) { - return _ZGVnN4vv_powf (x, x); + return x; } -__vpcs static v_double -xy__vn_pow (v_double x) +static sv_float +__sv_dummyf (sv_float x, sv_bool pg) { - return __vn_pow (x, x); + return x; } -__vpcs static v_double -xy_Z_pow (v_double x) -{ - return _ZGVnN2vv_pow (x, x); -} #endif - -static v_float -xy__v_powf (v_float x) -{ - return __v_powf (x, x); -} - -static v_double -xy__v_pow (v_double x) -{ - return __v_pow (x, x); -} #endif - -static float -xy__s_powf (float x) -{ - return __s_powf (x, x); -} - -static double -xy__s_pow (double x) -{ - return __s_pow (x, x); -} #endif -static double -xypow (double x) -{ - return pow (x, x); -} - -static float -xypowf (float x) -{ - return powf (x, x); -} - -static double -xpow (double x) -{ - return pow (x, 23.4); -} - -static float -xpowf (float x) -{ - return powf (x, 23.4f); -} - -static double -ypow (double x) -{ - return pow (2.34, x); -} - -static float -ypowf (float x) -{ - return powf (2.34f, x); -} - -static float -sincosf_wrap (float x) -{ - float s, c; - sincosf (x, &s, &c); - return s + c; -} +#include "test/mathbench_wrappers.h" static const struct fun { @@ -229,6 +189,10 @@ static const struct fun __vpcs v_double (*vnd) (v_double); __vpcs v_float (*vnf) (v_float); #endif +#if WANT_SVE_MATH + sv_double (*svd) (sv_double, sv_bool); + sv_float (*svf) (sv_float, sv_bool); +#endif } fun; } funtab[] = { #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}}, @@ -237,106 +201,25 @@ static const struct fun #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}}, #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}}, #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}}, +#define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}}, +#define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}}, D (dummy, 1.0, 2.0) -D (exp, -9.9, 9.9) -D (exp, 0.5, 1.0) -D (exp2, -9.9, 9.9) -D (log, 0.01, 11.1) -D (log, 0.999, 1.001) -D (log2, 0.01, 11.1) -D (log2, 0.999, 1.001) -{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}}, -D (xpow, 0.01, 11.1) -D (ypow, -9.9, 9.9) -D (erf, -6.0, 6.0) - F (dummyf, 1.0, 2.0) -F (expf, -9.9, 9.9) -F (exp2f, -9.9, 9.9) -F (logf, 0.01, 11.1) -F (log2f, 0.01, 11.1) -{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}}, -F (xpowf, 0.01, 11.1) -F (ypowf, -9.9, 9.9) -{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}}, -F (sinf, 0.1, 0.7) -F (sinf, 0.8, 3.1) -F (sinf, -3.1, 3.1) -F (sinf, 3.3, 33.3) -F (sinf, 100, 1000) -F (sinf, 1e6, 1e32) -F (cosf, 0.1, 0.7) -F (cosf, 0.8, 3.1) -F (cosf, -3.1, 3.1) -F (cosf, 3.3, 33.3) -F (cosf, 100, 1000) -F (cosf, 1e6, 1e32) -F (erff, -4.0, 4.0) #if WANT_VMATH -D (__s_sin, -3.1, 3.1) -D (__s_cos, -3.1, 3.1) -D (__s_exp, -9.9, 9.9) -D (__s_log, 0.01, 11.1) -{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}}, -F (__s_expf, -9.9, 9.9) -F (__s_expf_1u, -9.9, 9.9) -F (__s_exp2f, -9.9, 9.9) -F (__s_exp2f_1u, -9.9, 9.9) -F (__s_logf, 0.01, 11.1) -{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}}, -F (__s_sinf, -3.1, 3.1) -F (__s_cosf, -3.1, 3.1) #if __aarch64__ VD (__v_dummy, 1.0, 2.0) -VD (__v_sin, -3.1, 3.1) -VD (__v_cos, -3.1, 3.1) -VD (__v_exp, -9.9, 9.9) -VD (__v_log, 0.01, 11.1) -{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}}, VF (__v_dummyf, 1.0, 2.0) -VF (__v_expf, -9.9, 9.9) -VF (__v_expf_1u, -9.9, 9.9) -VF (__v_exp2f, -9.9, 9.9) -VF (__v_exp2f_1u, -9.9, 9.9) -VF (__v_logf, 0.01, 11.1) -{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}}, -VF (__v_sinf, -3.1, 3.1) -VF (__v_cosf, -3.1, 3.1) #ifdef __vpcs VND (__vn_dummy, 1.0, 2.0) -VND (__vn_exp, -9.9, 9.9) -VND (_ZGVnN2v_exp, -9.9, 9.9) -VND (__vn_log, 0.01, 11.1) -VND (_ZGVnN2v_log, 0.01, 11.1) -{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}}, -{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}}, -VND (__vn_sin, -3.1, 3.1) -VND (_ZGVnN2v_sin, -3.1, 3.1) -VND (__vn_cos, -3.1, 3.1) -VND (_ZGVnN2v_cos, -3.1, 3.1) VNF (__vn_dummyf, 1.0, 2.0) -VNF (__vn_expf, -9.9, 9.9) -VNF (_ZGVnN4v_expf, -9.9, 9.9) -VNF (__vn_expf_1u, -9.9, 9.9) -VNF (__vn_exp2f, -9.9, 9.9) -VNF (_ZGVnN4v_exp2f, -9.9, 9.9) -VNF (__vn_exp2f_1u, -9.9, 9.9) -VNF (__vn_logf, 0.01, 11.1) -VNF (_ZGVnN4v_logf, 0.01, 11.1) -{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}}, -{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}}, -VNF (__vn_sinf, -3.1, 3.1) -VNF (_ZGVnN4v_sinf, -3.1, 3.1) -VNF (__vn_cosf, -3.1, 3.1) -VNF (_ZGVnN4v_cosf, -3.1, 3.1) +#endif +#if WANT_SVE_MATH +SVD (__sv_dummy, 1.0, 2.0) +SVF (__sv_dummyf, 1.0, 2.0) #endif #endif #endif +#include "test/mathbench_funcs.h" {0}, #undef F #undef D @@ -344,6 +227,8 @@ VNF (_ZGVnN4v_cosf, -3.1, 3.1) #undef VD #undef VNF #undef VND +#undef SVF +#undef SVD }; static void @@ -508,6 +393,40 @@ runf_vn_latency (__vpcs v_float f (v_float)) } #endif +#if WANT_SVE_MATH +static void +run_sv_thruput (sv_double f (sv_double, sv_bool)) +{ + for (int i = 0; i < N; i += sv_double_len ()) + f (sv_double_load (A+i), svptrue_b64 ()); +} + +static void +runf_sv_thruput (sv_float f (sv_float, sv_bool)) +{ + for (int i = 0; i < N; i += sv_float_len ()) + f (sv_float_load (Af+i), svptrue_b32 ()); +} + +static void +run_sv_latency (sv_double f (sv_double, sv_bool)) +{ + sv_double z = sv_double_dup (zero); + sv_double prev = z; + for (int i = 0; i < N; i += sv_double_len ()) + prev = f (svmad_f64_x (svptrue_b64 (), prev, z, sv_double_load (A+i)), svptrue_b64 ()); +} + +static void +runf_sv_latency (sv_float f (sv_float, sv_bool)) +{ + sv_float z = sv_float_dup (zero); + sv_float prev = z; + for (int i = 0; i < N; i += sv_float_len ()) + prev = f (svmad_f32_x (svptrue_b32 (), prev, z, sv_float_load (Af+i)), svptrue_b32 ()); +} +#endif + static uint64_t tic (void) { @@ -570,6 +489,16 @@ bench1 (const struct fun *f, int type, double lo, double hi) else if (f->prec == 'f' && type == 'l' && f->vec == 'n') TIMEIT (runf_vn_latency, f->fun.vnf); #endif +#if WANT_SVE_MATH + else if (f->prec == 'd' && type == 't' && f->vec == 's') + TIMEIT (run_sv_thruput, f->fun.svd); + else if (f->prec == 'd' && type == 'l' && f->vec == 's') + TIMEIT (run_sv_latency, f->fun.svd); + else if (f->prec == 'f' && type == 't' && f->vec == 's') + TIMEIT (runf_sv_thruput, f->fun.svf); + else if (f->prec == 'f' && type == 'l' && f->vec == 's') + TIMEIT (runf_sv_latency, f->fun.svf); +#endif if (type == 't') { diff --git a/math/test/mathbench_funcs.h b/math/test/mathbench_funcs.h new file mode 100644 index 0000000..ad6dd2a --- /dev/null +++ b/math/test/mathbench_funcs.h @@ -0,0 +1,100 @@ +/* + * Function entries for mathbench. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +D (exp, -9.9, 9.9) +D (exp, 0.5, 1.0) +D (exp2, -9.9, 9.9) +D (log, 0.01, 11.1) +D (log, 0.999, 1.001) +D (log2, 0.01, 11.1) +D (log2, 0.999, 1.001) +{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}}, +D (xpow, 0.01, 11.1) +D (ypow, -9.9, 9.9) +D (erf, -6.0, 6.0) + +F (expf, -9.9, 9.9) +F (exp2f, -9.9, 9.9) +F (logf, 0.01, 11.1) +F (log2f, 0.01, 11.1) +{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}}, +F (xpowf, 0.01, 11.1) +F (ypowf, -9.9, 9.9) +{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}}, +{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}}, +{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}}, +{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}}, +{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}}, +{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}}, +F (sinf, 0.1, 0.7) +F (sinf, 0.8, 3.1) +F (sinf, -3.1, 3.1) +F (sinf, 3.3, 33.3) +F (sinf, 100, 1000) +F (sinf, 1e6, 1e32) +F (cosf, 0.1, 0.7) +F (cosf, 0.8, 3.1) +F (cosf, -3.1, 3.1) +F (cosf, 3.3, 33.3) +F (cosf, 100, 1000) +F (cosf, 1e6, 1e32) +F (erff, -4.0, 4.0) +#if WANT_VMATH +D (__s_sin, -3.1, 3.1) +D (__s_cos, -3.1, 3.1) +D (__s_exp, -9.9, 9.9) +D (__s_log, 0.01, 11.1) +{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}}, +F (__s_expf, -9.9, 9.9) +F (__s_expf_1u, -9.9, 9.9) +F (__s_exp2f, -9.9, 9.9) +F (__s_exp2f_1u, -9.9, 9.9) +F (__s_logf, 0.01, 11.1) +{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}}, +F (__s_sinf, -3.1, 3.1) +F (__s_cosf, -3.1, 3.1) +#if __aarch64__ +VD (__v_sin, -3.1, 3.1) +VD (__v_cos, -3.1, 3.1) +VD (__v_exp, -9.9, 9.9) +VD (__v_log, 0.01, 11.1) +{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}}, +VF (__v_expf, -9.9, 9.9) +VF (__v_expf_1u, -9.9, 9.9) +VF (__v_exp2f, -9.9, 9.9) +VF (__v_exp2f_1u, -9.9, 9.9) +VF (__v_logf, 0.01, 11.1) +{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}}, +VF (__v_sinf, -3.1, 3.1) +VF (__v_cosf, -3.1, 3.1) +#ifdef __vpcs +VND (__vn_exp, -9.9, 9.9) +VND (_ZGVnN2v_exp, -9.9, 9.9) +VND (__vn_log, 0.01, 11.1) +VND (_ZGVnN2v_log, 0.01, 11.1) +{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}}, +{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}}, +VND (__vn_sin, -3.1, 3.1) +VND (_ZGVnN2v_sin, -3.1, 3.1) +VND (__vn_cos, -3.1, 3.1) +VND (_ZGVnN2v_cos, -3.1, 3.1) +VNF (__vn_expf, -9.9, 9.9) +VNF (_ZGVnN4v_expf, -9.9, 9.9) +VNF (__vn_expf_1u, -9.9, 9.9) +VNF (__vn_exp2f, -9.9, 9.9) +VNF (_ZGVnN4v_exp2f, -9.9, 9.9) +VNF (__vn_exp2f_1u, -9.9, 9.9) +VNF (__vn_logf, 0.01, 11.1) +VNF (_ZGVnN4v_logf, 0.01, 11.1) +{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}}, +{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}}, +VNF (__vn_sinf, -3.1, 3.1) +VNF (_ZGVnN4v_sinf, -3.1, 3.1) +VNF (__vn_cosf, -3.1, 3.1) +VNF (_ZGVnN4v_cosf, -3.1, 3.1) +#endif +#endif +#endif diff --git a/math/test/mathbench_wrappers.h b/math/test/mathbench_wrappers.h new file mode 100644 index 0000000..8311f0f --- /dev/null +++ b/math/test/mathbench_wrappers.h @@ -0,0 +1,104 @@ +/* + * Function wrappers for mathbench. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +#if WANT_VMATH +#if __aarch64__ + +#ifdef __vpcs +__vpcs static v_float +xy__vn_powf (v_float x) +{ + return __vn_powf (x, x); +} + +__vpcs static v_float +xy_Z_powf (v_float x) +{ + return _ZGVnN4vv_powf (x, x); +} + +__vpcs static v_double +xy__vn_pow (v_double x) +{ + return __vn_pow (x, x); +} + +__vpcs static v_double +xy_Z_pow (v_double x) +{ + return _ZGVnN2vv_pow (x, x); +} +#endif // __vpcs + +static v_float +xy__v_powf (v_float x) +{ + return __v_powf (x, x); +} + +static v_double +xy__v_pow (v_double x) +{ + return __v_pow (x, x); +} +#endif // __aarch64__ + +static float +xy__s_powf (float x) +{ + return __s_powf (x, x); +} + +static double +xy__s_pow (double x) +{ + return __s_pow (x, x); +} +#endif // WANT_VMATH + +static double +xypow (double x) +{ + return pow (x, x); +} + +static float +xypowf (float x) +{ + return powf (x, x); +} + +static double +xpow (double x) +{ + return pow (x, 23.4); +} + +static float +xpowf (float x) +{ + return powf (x, 23.4f); +} + +static double +ypow (double x) +{ + return pow (2.34, x); +} + +static float +ypowf (float x) +{ + return powf (2.34f, x); +} + +static float +sincosf_wrap (float x) +{ + float s, c; + sincosf (x, &s, &c); + return s + c; +} diff --git a/math/test/mathtest.c b/math/test/mathtest.c index 3108967..3168da4 100644 --- a/math/test/mathtest.c +++ b/math/test/mathtest.c @@ -1,8 +1,8 @@ /* * mathtest.c - test rig for mathlib * - * Copyright (c) 1998-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 1998-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <assert.h> @@ -196,9 +196,11 @@ int is_complex_rettype(int rettype) { #define TFUNCARM(arg,ret,name,tolerance) { t_func, arg, ret, (void*)& ARM_PREFIX(name), m_none, tolerance, #name } #define MFUNC(arg,ret,name,tolerance) { t_macro, arg, ret, NULL, m_##name, tolerance, #name } +#ifndef PL /* sincosf wrappers for easier testing. */ static float sincosf_sinf(float x) { float s,c; sincosf(x, &s, &c); return s; } static float sincosf_cosf(float x) { float s,c; sincosf(x, &s, &c); return c; } +#endif test_func tfuncs[] = { /* trigonometric */ @@ -218,9 +220,10 @@ test_func tfuncs[] = { TFUNCARM(at_s,rt_s, tanf, 4*ULPUNIT), TFUNCARM(at_s,rt_s, sinf, 3*ULPUNIT/4), TFUNCARM(at_s,rt_s, cosf, 3*ULPUNIT/4), +#ifndef PL TFUNCARM(at_s,rt_s, sincosf_sinf, 3*ULPUNIT/4), TFUNCARM(at_s,rt_s, sincosf_cosf, 3*ULPUNIT/4), - +#endif /* hyperbolic */ TFUNC(at_d, rt_d, atanh, 4*ULPUNIT), TFUNC(at_d, rt_d, asinh, 4*ULPUNIT), diff --git a/math/test/rtest/dotest.c b/math/test/rtest/dotest.c index 6be79e1..5b3e9b4 100644 --- a/math/test/rtest/dotest.c +++ b/math/test/rtest/dotest.c @@ -2,7 +2,7 @@ * dotest.c - actually generate mathlib test cases * * Copyright (c) 1999-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdio.h> diff --git a/math/test/rtest/intern.h b/math/test/rtest/intern.h index 12a9c74..3ebd7dd 100644 --- a/math/test/rtest/intern.h +++ b/math/test/rtest/intern.h @@ -2,7 +2,7 @@ * intern.h * * Copyright (c) 1999-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef mathtest_intern_h diff --git a/math/test/rtest/main.c b/math/test/rtest/main.c index 0d8ead8..3d533c9 100644 --- a/math/test/rtest/main.c +++ b/math/test/rtest/main.c @@ -2,7 +2,7 @@ * main.c * * Copyright (c) 1999-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <assert.h> diff --git a/math/test/rtest/random.c b/math/test/rtest/random.c index 5612396..1de3258 100644 --- a/math/test/rtest/random.c +++ b/math/test/rtest/random.c @@ -2,7 +2,7 @@ * random.c - random number generator for producing mathlib test cases * * Copyright (c) 1998-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "types.h" diff --git a/math/test/rtest/random.h b/math/test/rtest/random.h index b4b22df..0b477d7 100644 --- a/math/test/rtest/random.h +++ b/math/test/rtest/random.h @@ -2,7 +2,7 @@ * random.h - header for random.c * * Copyright (c) 2009-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "types.h" diff --git a/math/test/rtest/semi.c b/math/test/rtest/semi.c index c9f0daf..70a7844 100644 --- a/math/test/rtest/semi.c +++ b/math/test/rtest/semi.c @@ -2,7 +2,7 @@ * semi.c: test implementations of mathlib seminumerical functions * * Copyright (c) 1999-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdio.h> diff --git a/math/test/rtest/semi.h b/math/test/rtest/semi.h index 17dc415..7a1444e 100644 --- a/math/test/rtest/semi.h +++ b/math/test/rtest/semi.h @@ -2,7 +2,7 @@ * semi.h: header for semi.c * * Copyright (c) 1999-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef test_semi_h diff --git a/math/test/rtest/types.h b/math/test/rtest/types.h index 53cd557..e15b4e0 100644 --- a/math/test/rtest/types.h +++ b/math/test/rtest/types.h @@ -2,7 +2,7 @@ * types.h * * Copyright (c) 2005-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef mathtest_types_h diff --git a/math/test/rtest/wrappers.c b/math/test/rtest/wrappers.c index de45ac5..4410171 100644 --- a/math/test/rtest/wrappers.c +++ b/math/test/rtest/wrappers.c @@ -2,7 +2,7 @@ * wrappers.c - wrappers to modify output of MPFR/MPC test functions * * Copyright (c) 2014-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <assert.h> diff --git a/math/test/rtest/wrappers.h b/math/test/rtest/wrappers.h index 7b09c85..0a8a587 100644 --- a/math/test/rtest/wrappers.h +++ b/math/test/rtest/wrappers.h @@ -2,7 +2,7 @@ * wrappers.h - wrappers to modify output of MPFR/MPC test functions * * Copyright (c) 2014-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ typedef struct { diff --git a/math/test/runulp.sh b/math/test/runulp.sh index 0190d9a..b4000f6 100755 --- a/math/test/runulp.sh +++ b/math/test/runulp.sh @@ -2,8 +2,8 @@ # ULP error check script. # -# Copyright (c) 2019-2020, Arm Limited. -# SPDX-License-Identifier: MIT +# Copyright (c) 2019-2022, Arm Limited. +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception #set -x set -eu @@ -145,7 +145,7 @@ done # vector functions Ldir=0.5 r='n' -flags="${ULPFLAGS:--q} -f" +flags="${ULPFLAGS:--q}" runs= check __s_exp 1 && runs=1 runv= @@ -229,7 +229,7 @@ L_sinf=1.4 L_cosf=1.4 L_powf=2.1 -while read G F R +while read G F R D do [ "$R" = 1 ] || continue case "$G" in \#*) continue ;; esac @@ -239,7 +239,16 @@ do do [ -n "$X" ] || continue case "$X" in \#*) continue ;; esac - t $F $X + disable_fenv="" + if [ -z "$WANT_SIMD_EXCEPT" ] || [ $WANT_SIMD_EXCEPT -eq 0 ]; then + # If library was built with SIMD exceptions + # disabled, disable fenv checking in ulp + # tool. Otherwise, fenv checking may still be + # disabled by adding -f to the end of the run + # line. + disable_fenv="-f" + fi + t $D $disable_fenv $F $X done << EOF $range EOF @@ -255,10 +264,10 @@ log __v_log $runv log __vn_log $runvn log _ZGVnN2v_log $runvn -pow __s_pow $runs -pow __v_pow $runv -pow __vn_pow $runvn -pow _ZGVnN2vv_pow $runvn +pow __s_pow $runs -f +pow __v_pow $runv -f +pow __vn_pow $runvn -f +pow _ZGVnN2vv_pow $runvn -f sin __s_sin $runs sin __v_sin $runv @@ -275,18 +284,18 @@ expf __v_expf $runv expf __vn_expf $runvn expf _ZGVnN4v_expf $runvn -expf_1u __s_expf_1u $runs -expf_1u __v_expf_1u $runv -expf_1u __vn_expf_1u $runvn +expf_1u __s_expf_1u $runs -f +expf_1u __v_expf_1u $runv -f +expf_1u __vn_expf_1u $runvn -f exp2f __s_exp2f $runs exp2f __v_exp2f $runv exp2f __vn_exp2f $runvn exp2f _ZGVnN4v_exp2f $runvn -exp2f_1u __s_exp2f_1u $runs -exp2f_1u __v_exp2f_1u $runv -exp2f_1u __vn_exp2f_1u $runvn +exp2f_1u __s_exp2f_1u $runs -f +exp2f_1u __v_exp2f_1u $runv -f +exp2f_1u __vn_exp2f_1u $runvn -f logf __s_logf $runs logf __v_logf $runv @@ -303,10 +312,10 @@ cosf __v_cosf $runv cosf __vn_cosf $runvn cosf _ZGVnN4v_cosf $runvn -powf __s_powf $runs -powf __v_powf $runv -powf __vn_powf $runvn -powf _ZGVnN4vv_powf $runvn +powf __s_powf $runs -f +powf __v_powf $runv -f +powf __vn_powf $runvn -f +powf _ZGVnN4vv_powf $runvn -f EOF [ 0 -eq $FAIL ] || { diff --git a/math/test/testcases/directed/cosf.tst b/math/test/testcases/directed/cosf.tst index 7916044..7ea0d45 100644 --- a/math/test/testcases/directed/cosf.tst +++ b/math/test/testcases/directed/cosf.tst @@ -1,7 +1,7 @@ ; cosf.tst - Directed test cases for SP cosine ; ; Copyright (c) 2007-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=cosf op1=7fc00001 result=7fc00001 errno=0 func=cosf op1=ffc00001 result=7fc00001 errno=0 diff --git a/math/test/testcases/directed/erf.tst b/math/test/testcases/directed/erf.tst index 7fa4d18..12384ce 100644 --- a/math/test/testcases/directed/erf.tst +++ b/math/test/testcases/directed/erf.tst @@ -1,7 +1,7 @@ ; erf.tst - Directed test cases for erf ; ; Copyright (c) 2007-2020, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=erf op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 func=erf op1=fff80000.00000001 result=7ff80000.00000001 errno=0 diff --git a/math/test/testcases/directed/erff.tst b/math/test/testcases/directed/erff.tst index d05b7b1..28f8fa3 100644 --- a/math/test/testcases/directed/erff.tst +++ b/math/test/testcases/directed/erff.tst @@ -1,7 +1,7 @@ ; erff.tst ; ; Copyright (c) 2007-2020, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=erff op1=7fc00001 result=7fc00001 errno=0 func=erff op1=ffc00001 result=7fc00001 errno=0 diff --git a/math/test/testcases/directed/exp.tst b/math/test/testcases/directed/exp.tst index 85d556c..0bb2ef4 100644 --- a/math/test/testcases/directed/exp.tst +++ b/math/test/testcases/directed/exp.tst @@ -1,7 +1,7 @@ ; Directed test cases for exp ; ; Copyright (c) 2018-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=exp op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 func=exp op1=fff80000.00000001 result=7ff80000.00000001 errno=0 diff --git a/math/test/testcases/directed/exp2.tst b/math/test/testcases/directed/exp2.tst index fa56c9f..7069f90 100644 --- a/math/test/testcases/directed/exp2.tst +++ b/math/test/testcases/directed/exp2.tst @@ -1,7 +1,7 @@ ; Directed test cases for exp2 ; ; Copyright (c) 2018-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=exp2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 func=exp2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0 diff --git a/math/test/testcases/directed/exp2f.tst b/math/test/testcases/directed/exp2f.tst index 38cfc3f..6ca2eea 100644 --- a/math/test/testcases/directed/exp2f.tst +++ b/math/test/testcases/directed/exp2f.tst @@ -1,7 +1,7 @@ ; exp2f.tst - Directed test cases for exp2f ; ; Copyright (c) 2017-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=exp2f op1=7fc00001 result=7fc00001 errno=0 func=exp2f op1=ffc00001 result=7fc00001 errno=0 diff --git a/math/test/testcases/directed/expf.tst b/math/test/testcases/directed/expf.tst index ff0f671..89ae8fe 100644 --- a/math/test/testcases/directed/expf.tst +++ b/math/test/testcases/directed/expf.tst @@ -1,7 +1,7 @@ ; expf.tst - Directed test cases for expf ; ; Copyright (c) 2007-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=expf op1=7fc00001 result=7fc00001 errno=0 func=expf op1=ffc00001 result=7fc00001 errno=0 diff --git a/math/test/testcases/directed/log.tst b/math/test/testcases/directed/log.tst index a0aa398..686ea83 100644 --- a/math/test/testcases/directed/log.tst +++ b/math/test/testcases/directed/log.tst @@ -1,7 +1,7 @@ ; Directed test cases for log ; ; Copyright (c) 2018-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=log op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 func=log op1=fff80000.00000001 result=7ff80000.00000001 errno=0 diff --git a/math/test/testcases/directed/log2.tst b/math/test/testcases/directed/log2.tst index ff1286c..361bdde 100644 --- a/math/test/testcases/directed/log2.tst +++ b/math/test/testcases/directed/log2.tst @@ -1,7 +1,7 @@ ; Directed test cases for log2 ; ; Copyright (c) 2018-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0 diff --git a/math/test/testcases/directed/log2f.tst b/math/test/testcases/directed/log2f.tst index 5832c4f..5fce051 100644 --- a/math/test/testcases/directed/log2f.tst +++ b/math/test/testcases/directed/log2f.tst @@ -1,7 +1,7 @@ ; log2f.tst - Directed test cases for log2f ; ; Copyright (c) 2017-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=log2f op1=7fc00001 result=7fc00001 errno=0 func=log2f op1=ffc00001 result=7fc00001 errno=0 diff --git a/math/test/testcases/directed/logf.tst b/math/test/testcases/directed/logf.tst index 6e68a36..a6d1b9d 100644 --- a/math/test/testcases/directed/logf.tst +++ b/math/test/testcases/directed/logf.tst @@ -1,7 +1,7 @@ ; logf.tst - Directed test cases for logf ; ; Copyright (c) 2007-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=logf op1=7fc00001 result=7fc00001 errno=0 func=logf op1=ffc00001 result=7fc00001 errno=0 diff --git a/math/test/testcases/directed/pow.tst b/math/test/testcases/directed/pow.tst index 1966581..879d128 100644 --- a/math/test/testcases/directed/pow.tst +++ b/math/test/testcases/directed/pow.tst @@ -1,7 +1,7 @@ ; Directed test cases for pow ; ; Copyright (c) 2018-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=pow op1=00000000.00000000 op2=00000000.00000000 result=3ff00000.00000000 errno=0 func=pow op1=00000000.00000000 op2=00000000.00000001 result=00000000.00000000 errno=0 diff --git a/math/test/testcases/directed/powf.tst b/math/test/testcases/directed/powf.tst index 3fa8b11..46d5224 100644 --- a/math/test/testcases/directed/powf.tst +++ b/math/test/testcases/directed/powf.tst @@ -1,7 +1,7 @@ ; powf.tst - Directed test cases for powf ; ; Copyright (c) 2007-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i diff --git a/math/test/testcases/directed/sincosf.tst b/math/test/testcases/directed/sincosf.tst index 4b33d22..cddb346 100644 --- a/math/test/testcases/directed/sincosf.tst +++ b/math/test/testcases/directed/sincosf.tst @@ -1,7 +1,7 @@ ; Directed test cases for SP sincos ; ; Copyright (c) 2007-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=sincosf_sinf op1=7fc00001 result=7fc00001 errno=0 diff --git a/math/test/testcases/directed/sinf.tst b/math/test/testcases/directed/sinf.tst index ded80b1..041b13d 100644 --- a/math/test/testcases/directed/sinf.tst +++ b/math/test/testcases/directed/sinf.tst @@ -1,7 +1,7 @@ ; sinf.tst - Directed test cases for SP sine ; ; Copyright (c) 2007-2019, Arm Limited. -; SPDX-License-Identifier: MIT +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception func=sinf op1=7fc00001 result=7fc00001 errno=0 diff --git a/math/test/testcases/random/double.tst b/math/test/testcases/random/double.tst index c24ff80..8e885d6 100644 --- a/math/test/testcases/random/double.tst +++ b/math/test/testcases/random/double.tst @@ -1,7 +1,7 @@ !! double.tst - Random test case specification for DP functions !! !! Copyright (c) 1999-2019, Arm Limited. -!! SPDX-License-Identifier: MIT +!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception test exp 10000 test exp2 10000 diff --git a/math/test/testcases/random/float.tst b/math/test/testcases/random/float.tst index d02a227..ea4a5a0 100644 --- a/math/test/testcases/random/float.tst +++ b/math/test/testcases/random/float.tst @@ -1,7 +1,7 @@ !! single.tst - Random test case specification for SP functions !! !! Copyright (c) 1999-2019, Arm Limited. -!! SPDX-License-Identifier: MIT +!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception test sinf 10000 test cosf 10000 diff --git a/math/test/ulp.c b/math/test/ulp.c index 51479b8..bb8c3ad 100644 --- a/math/test/ulp.c +++ b/math/test/ulp.c @@ -1,8 +1,8 @@ /* * ULP error checking tool for math functions. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <ctype.h> @@ -214,16 +214,6 @@ struct conf double errlim; }; -/* Wrappers for sincos. */ -static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);} -static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);} -static double sincos_sin(double x) {(void)cos(x); return sin(x);} -static double sincos_cos(double x) {(void)sin(x); return cos(x);} -#if USE_MPFR -static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); } -static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); } -#endif - /* A bit of a hack: call vector functions twice with the same input in lane 0 but a different value in other lanes: once with an in-range value and then with a special case value. */ @@ -233,50 +223,79 @@ static int secondcall; #if __aarch64__ && WANT_VMATH typedef __f32x4_t v_float; typedef __f64x2_t v_double; -static const float fv[2] = {1.0f, -INFINITY}; -static const double dv[2] = {1.0, -INFINITY}; +/* First element of fv and dv may be changed by -c argument. */ +static float fv[2] = {1.0f, -INFINITY}; +static double dv[2] = {1.0, -INFINITY}; static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; } static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; } +#if WANT_SVE_MATH +#include <arm_sve.h> +typedef __SVFloat32_t sv_float; +typedef __SVFloat64_t sv_double; + +static inline sv_float svargf(float x) { + int n = svcntw(); + float base[n]; + for (int i=0; i<n; i++) + base[i] = (float)x; + base[n-1] = (float) fv[secondcall]; + return svld1(svptrue_b32(), base); +} +static inline sv_double svargd(double x) { + int n = svcntd(); + double base[n]; + for (int i=0; i<n; i++) + base[i] = x; + base[n-1] = dv[secondcall]; + return svld1(svptrue_b64(), base); +} +static inline float svretf(sv_float vec) { + int n = svcntw(); + float res[n]; + svst1(svptrue_b32(), res, vec); + return res[0]; +} +static inline double svretd(sv_double vec) { + int n = svcntd(); + double res[n]; + svst1(svptrue_b64(), res, vec); + return res[0]; +} +#endif +#endif + +#if WANT_SVE_MATH +long double +dummyl (long double x) +{ + return x; +} + +double +dummy (double x) +{ + return x; +} + +static sv_double +__sv_dummy (sv_double x) +{ + return x; +} -static float v_sinf(float x) { return __v_sinf(argf(x))[0]; } -static float v_cosf(float x) { return __v_cosf(argf(x))[0]; } -static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; } -static float v_expf(float x) { return __v_expf(argf(x))[0]; } -static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; } -static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; } -static float v_logf(float x) { return __v_logf(argf(x))[0]; } -static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; } -static double v_sin(double x) { return __v_sin(argd(x))[0]; } -static double v_cos(double x) { return __v_cos(argd(x))[0]; } -static double v_exp(double x) { return __v_exp(argd(x))[0]; } -static double v_log(double x) { return __v_log(argd(x))[0]; } -static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; } -#ifdef __vpcs -static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; } -static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; } -static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; } -static float vn_expf(float x) { return __vn_expf(argf(x))[0]; } -static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; } -static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; } -static float vn_logf(float x) { return __vn_logf(argf(x))[0]; } -static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; } -static double vn_sin(double x) { return __vn_sin(argd(x))[0]; } -static double vn_cos(double x) { return __vn_cos(argd(x))[0]; } -static double vn_exp(double x) { return __vn_exp(argd(x))[0]; } -static double vn_log(double x) { return __vn_log(argd(x))[0]; } -static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; } -static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; } -static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; } -static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; } -static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; } -static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; } -static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; } -static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; } -static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; } -static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; } -static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; } -static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; } +static sv_float +__sv_dummyf (sv_float x) +{ + return x; +} #endif + +#include "test/ulp_wrappers.h" + +/* Wrappers for SVE functions. */ +#if WANT_SVE_MATH +static double sv_dummy (double x) { return svretd (__sv_dummy (svargd (x))); } +static float sv_dummyf (float x) { return svretf (__sv_dummyf (svargf (x))); } #endif struct fun @@ -322,83 +341,53 @@ static const struct fun fun[] = { #define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0) #define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0) #define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0) - F1 (sin) - F1 (cos) - F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0) - F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0) - F1 (exp) - F1 (exp2) - F1 (log) - F1 (log2) - F2 (pow) - F1 (erf) - D1 (exp) - D1 (exp2) - D1 (log) - D1 (log2) - D2 (pow) - D1 (erf) -#if WANT_VMATH - F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0) - F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0) - F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0) - F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0) - F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0) - F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0) - F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0) - F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0) - F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0) - F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0) - F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0) - F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0) - F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0) -#if __aarch64__ - F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1) - F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1) - F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1) - F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1) - F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1) - F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) - F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1) - F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1) - F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1) - F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1) - F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1) - F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1) - F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1) -#ifdef __vpcs - F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1) - F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1) - F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1) - F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1) - F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1) - F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) - F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1) - F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1) - F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1) - F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1) - F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1) - F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1) - F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1) - F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1) - F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1) - F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1) - F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) - F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1) - F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1) - F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1) - F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1) - F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1) - F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1) - F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1) -#endif -#endif +/* Neon routines. */ +#define VF1(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 1, 1, f1, 0) +#define VF2(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 2, 1, f2, 0) +#define VD1(x) F (__v_##x, v_##x, x##l, mpfr_##x, 1, 0, d1, 0) +#define VD2(x) F (__v_##x, v_##x, x##l, mpfr_##x, 2, 0, d2, 0) +#define VNF1(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 1, 1, f1, 0) +#define VNF2(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 2, 1, f2, 0) +#define VND1(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 1, 0, d1, 0) +#define VND2(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 2, 0, d2, 0) +#define ZVF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0) +#define ZVF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0) +#define ZVD1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0) +#define ZVD2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0) +#define ZVNF1(x) VNF1 (x) ZVF1 (x) +#define ZVNF2(x) VNF2 (x) ZVF2 (x) +#define ZVND1(x) VND1 (x) ZVD1 (x) +#define ZVND2(x) VND2 (x) ZVD2 (x) +#define SF1(x) F (__s_##x##f, __s_##x##f, x, mpfr_##x, 1, 1, f1, 0) +#define SF2(x) F (__s_##x##f, __s_##x##f, x, mpfr_##x, 2, 1, f2, 0) +#define SD1(x) F (__s_##x, __s_##x, x##l, mpfr_##x, 1, 0, d1, 0) +#define SD2(x) F (__s_##x, __s_##x, x##l, mpfr_##x, 2, 0, d2, 0) +/* SVE routines. */ +#define SVF1(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 1, 1, f1, 0) +#define SVF2(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 2, 1, f2, 0) +#define SVD1(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 1, 0, d1, 0) +#define SVD2(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 2, 0, d2, 0) +#define ZSVF1(x) F (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0) +#define ZSVF2(x) F (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0) +#define ZSVD1(x) F (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0) +#define ZSVD2(x) F (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0) + +#include "test/ulp_funcs.h" + +#if WANT_SVE_MATH + SVD1 (dummy) + SVF1 (dummy) #endif + #undef F #undef F1 #undef F2 #undef D1 #undef D2 +#undef SVF1 +#undef SVF2 +#undef SVD1 +#undef SVD2 {0}}; /* Boilerplate for generic calls. */ @@ -645,6 +634,11 @@ usage (void) puts ("-q: quiet."); puts ("-m: use mpfr even if faster method is available."); puts ("-f: disable fenv testing (rounding modes and exceptions)."); +#if __aarch64__ && WANT_VMATH + puts ("-c: neutral 'control value' to test behaviour when one lane can affect another. \n" + " This should be different from tested input in other lanes, and non-special \n" + " (i.e. should not trigger fenv exceptions). Default is 1."); +#endif puts ("Supported func:"); for (const struct fun *f = fun; f->name; f++) printf ("\t%s\n", f->name); @@ -812,6 +806,14 @@ main (int argc, char *argv[]) conf.rc = argv[0][0]; } break; +#if __aarch64__ && WANT_VMATH + case 'c': + argc--; + argv++; + fv[0] = strtof(argv[0], 0); + dv[0] = strtod(argv[0], 0); + break; +#endif default: usage (); } diff --git a/math/test/ulp.h b/math/test/ulp.h index a0c3016..327b4bd 100644 --- a/math/test/ulp.h +++ b/math/test/ulp.h @@ -2,7 +2,7 @@ * Generic functions for ULP error estimation. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* For each different math function type, diff --git a/math/test/ulp_funcs.h b/math/test/ulp_funcs.h new file mode 100644 index 0000000..f5cea4d --- /dev/null +++ b/math/test/ulp_funcs.h @@ -0,0 +1,78 @@ +/* + * Function entries for ulp. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + F1 (sin) + F1 (cos) + F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0) + F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0) + F1 (exp) + F1 (exp2) + F1 (log) + F1 (log2) + F2 (pow) + F1 (erf) + D1 (exp) + D1 (exp2) + D1 (log) + D1 (log2) + D2 (pow) + D1 (erf) +#if WANT_VMATH + F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0) + F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0) + F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0) + F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0) + F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0) + F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0) + F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0) + F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0) + F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0) + F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0) + F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0) + F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0) + F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0) +#if __aarch64__ + F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1) + F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1) + F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1) + F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1) + F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1) + F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) + F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1) + F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1) + F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1) + F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1) + F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1) + F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1) + F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1) +#ifdef __vpcs + F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1) + F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1) + F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1) + F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1) + F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1) + F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) + F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1) + F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1) + F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1) + F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1) + F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1) + F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1) + F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1) + F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1) + F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1) + F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1) + F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) + F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1) + F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1) + F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1) + F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1) + F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1) + F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1) + F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1) +#endif +#endif +#endif diff --git a/math/test/ulp_wrappers.h b/math/test/ulp_wrappers.h new file mode 100644 index 0000000..fd9e00c --- /dev/null +++ b/math/test/ulp_wrappers.h @@ -0,0 +1,59 @@ +/* + * Function wrappers for ulp. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +/* Wrappers for sincos. */ +static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);} +static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);} +static double sincos_sin(double x) {(void)cos(x); return sin(x);} +static double sincos_cos(double x) {(void)sin(x); return cos(x);} +#if USE_MPFR +static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); } +static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); } +#endif + +/* Wrappers for vector functions. */ +#if __aarch64__ && WANT_VMATH +static float v_sinf(float x) { return __v_sinf(argf(x))[0]; } +static float v_cosf(float x) { return __v_cosf(argf(x))[0]; } +static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; } +static float v_expf(float x) { return __v_expf(argf(x))[0]; } +static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; } +static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; } +static float v_logf(float x) { return __v_logf(argf(x))[0]; } +static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; } +static double v_sin(double x) { return __v_sin(argd(x))[0]; } +static double v_cos(double x) { return __v_cos(argd(x))[0]; } +static double v_exp(double x) { return __v_exp(argd(x))[0]; } +static double v_log(double x) { return __v_log(argd(x))[0]; } +static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; } +#ifdef __vpcs +static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; } +static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; } +static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; } +static float vn_expf(float x) { return __vn_expf(argf(x))[0]; } +static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; } +static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; } +static float vn_logf(float x) { return __vn_logf(argf(x))[0]; } +static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; } +static double vn_sin(double x) { return __vn_sin(argd(x))[0]; } +static double vn_cos(double x) { return __vn_cos(argd(x))[0]; } +static double vn_exp(double x) { return __vn_exp(argd(x))[0]; } +static double vn_log(double x) { return __vn_log(argd(x))[0]; } +static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; } +static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; } +static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; } +static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; } +static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; } +static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; } +static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; } +static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; } +static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; } +static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; } +static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; } +static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; } +#endif +#endif diff --git a/math/tools/cos.sollya b/math/tools/cos.sollya index bd72d6b..6690adf 100644 --- a/math/tools/cos.sollya +++ b/math/tools/cos.sollya @@ -1,7 +1,7 @@ // polynomial for approximating cos(x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 8; // polynomial degree a = -pi/4; // interval diff --git a/math/tools/exp.sollya b/math/tools/exp.sollya index b7a462c..0668bdb 100644 --- a/math/tools/exp.sollya +++ b/math/tools/exp.sollya @@ -1,7 +1,7 @@ // polynomial for approximating e^x // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 5; // poly degree N = 128; // table entries diff --git a/math/tools/exp2.sollya b/math/tools/exp2.sollya index e760769..bd0a42d 100644 --- a/math/tools/exp2.sollya +++ b/math/tools/exp2.sollya @@ -1,7 +1,7 @@ // polynomial for approximating 2^x // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception // exp2f parameters deg = 3; // poly degree diff --git a/math/tools/log.sollya b/math/tools/log.sollya index 6df4db4..5288f55 100644 --- a/math/tools/log.sollya +++ b/math/tools/log.sollya @@ -1,7 +1,7 @@ // polynomial for approximating log(1+x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 12; // poly degree // |log(1+x)| > 0x1p-4 outside the interval diff --git a/math/tools/log2.sollya b/math/tools/log2.sollya index 4a364c0..85811be 100644 --- a/math/tools/log2.sollya +++ b/math/tools/log2.sollya @@ -1,7 +1,7 @@ // polynomial for approximating log2(1+x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 11; // poly degree // |log2(1+x)| > 0x1p-4 outside the interval diff --git a/math/tools/log2_abs.sollya b/math/tools/log2_abs.sollya index 82c4dac..d018ba0 100644 --- a/math/tools/log2_abs.sollya +++ b/math/tools/log2_abs.sollya @@ -1,7 +1,7 @@ // polynomial for approximating log2(1+x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 7; // poly degree // interval ~= 1/(2*N), where N is the table entries diff --git a/math/tools/log_abs.sollya b/math/tools/log_abs.sollya index a2ac190..5f9bfe4 100644 --- a/math/tools/log_abs.sollya +++ b/math/tools/log_abs.sollya @@ -1,7 +1,7 @@ // polynomial for approximating log(1+x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 6; // poly degree // interval ~= 1/(2*N), where N is the table entries diff --git a/math/tools/plot.py b/math/tools/plot.py index 6c8b89f..a0fa023 100755 --- a/math/tools/plot.py +++ b/math/tools/plot.py @@ -3,7 +3,7 @@ # ULP error plot tool. # # Copyright (c) 2019, Arm Limited. -# SPDX-License-Identifier: MIT +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception import numpy as np import matplotlib.pyplot as plt diff --git a/math/tools/remez.jl b/math/tools/remez.jl index 2ff436f..1deab67 100755 --- a/math/tools/remez.jl +++ b/math/tools/remez.jl @@ -4,7 +4,7 @@ # remez.jl - implementation of the Remez algorithm for polynomial approximation # # Copyright (c) 2015-2019, Arm Limited. -# SPDX-License-Identifier: MIT +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception import Base.\ diff --git a/math/tools/sin.sollya b/math/tools/sin.sollya index a6e8511..a193000 100644 --- a/math/tools/sin.sollya +++ b/math/tools/sin.sollya @@ -1,7 +1,7 @@ // polynomial for approximating sin(x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 7; // polynomial degree a = -pi/4; // interval diff --git a/math/tools/v_exp.sollya b/math/tools/v_exp.sollya index c0abb63..5fa7de7 100644 --- a/math/tools/v_exp.sollya +++ b/math/tools/v_exp.sollya @@ -1,7 +1,7 @@ // polynomial for approximating e^x // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 4; // poly degree N = 128; // table entries diff --git a/math/tools/v_log.sollya b/math/tools/v_log.sollya index cc3d2c4..d982524 100644 --- a/math/tools/v_log.sollya +++ b/math/tools/v_log.sollya @@ -1,7 +1,7 @@ // polynomial used for __v_log(x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 6; // poly degree a = -0x1.fc1p-9; diff --git a/math/tools/v_sin.sollya b/math/tools/v_sin.sollya index 65cc995..63b9d65 100644 --- a/math/tools/v_sin.sollya +++ b/math/tools/v_sin.sollya @@ -1,7 +1,7 @@ // polynomial for approximating sin(x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 15; // polynomial degree a = -pi/2; // interval diff --git a/math/v_cos.c b/math/v_cos.c index 20ba6bd..4c8787e 100644 --- a/math/v_cos.c +++ b/math/v_cos.c @@ -1,8 +1,8 @@ /* * Double-precision vector cos function. * - * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" @@ -55,6 +55,14 @@ V_NAME(cos) (v_f64_t x) r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask); cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal)); +#if WANT_SIMD_EXCEPT + if (unlikely (v_any_u64 (cmp))) + /* If fenv exceptions are to be triggered correctly, set any special lanes + to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by + specialcase later. */ + r = v_sel_f64 (cmp, v_f64 (1.0), r); +#endif + /* n = rint((|x|+pi/2)/pi) - 0.5. */ n = v_fma_f64 (InvPi, r + HalfPi, Shift); odd = v_as_u64_f64 (n) << 63; diff --git a/math/v_cosf.c b/math/v_cosf.c index 150294b..bd677c3 100644 --- a/math/v_cosf.c +++ b/math/v_cosf.c @@ -1,8 +1,8 @@ /* * Single-precision vector cos function. * - * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" @@ -47,6 +47,14 @@ V_NAME(cosf) (v_f32_t x) r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask); cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal)); +#if WANT_SIMD_EXCEPT + if (unlikely (v_any_u32 (cmp))) + /* If fenv exceptions are to be triggered correctly, set any special lanes + to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by + specialcase later. */ + r = v_sel_f32 (cmp, v_f32 (1.0f), r); +#endif + /* n = rint((|x|+pi/2)/pi) - 0.5 */ n = v_fma_f32 (InvPi, r + HalfPi, Shift); odd = v_as_u32_f32 (n) << 31; diff --git a/math/v_exp.c b/math/v_exp.c index e459d53..da23fd1 100644 --- a/math/v_exp.c +++ b/math/v_exp.c @@ -1,8 +1,8 @@ /* * Double-precision vector e^x function. * - * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" @@ -36,6 +36,22 @@ #define Tab __v_exp_data #define IndexMask v_u64 (N - 1) #define Shift v_f64 (0x1.8p+52) + +#if WANT_SIMD_EXCEPT + +#define TinyBound 0x200 /* top12 (asuint64 (0x1p-511)). */ +#define BigBound 0x408 /* top12 (asuint64 (0x1p9)). */ + +VPCS_ATTR static NOINLINE v_f64_t +specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) +{ + /* If fenv exceptions are to be triggered correctly, fall back to the scalar + routine to special lanes. */ + return v_call_f64 (exp, x, y, cmp); +} + +#else + #define Thres v_f64 (704.0) VPCS_ATTR @@ -54,6 +70,8 @@ specialcase (v_f64_t s, v_f64_t y, v_f64_t n) return v_as_f64_u64 ((cmp & v_as_u64_f64 (r1)) | (~cmp & v_as_u64_f64 (r0))); } +#endif + VPCS_ATTR v_f64_t V_NAME(exp) (v_f64_t x) @@ -61,7 +79,18 @@ V_NAME(exp) (v_f64_t x) v_f64_t n, r, r2, s, y, z; v_u64_t cmp, u, e, i; +#if WANT_SIMD_EXCEPT + /* If any lanes are special, mask them with 1 and retain a copy of x to allow + specialcase to fix special lanes later. This is only necessary if fenv + exceptions are to be triggered correctly. */ + v_f64_t xm = x; + cmp = v_cond_u64 ((v_as_u64_f64 (v_abs_f64 (x)) >> 52) - TinyBound + >= BigBound - TinyBound); + if (unlikely (v_any_u64 (cmp))) + x = v_sel_f64 (cmp, v_f64 (1), x); +#else cmp = v_cond_u64 (v_abs_f64 (x) > Thres); +#endif /* n = round(x/(ln2/N)). */ z = v_fma_f64 (x, InvLn2, Shift); @@ -87,7 +116,12 @@ V_NAME(exp) (v_f64_t x) s = v_as_f64_u64 (u + e); if (unlikely (v_any_u64 (cmp))) +#if WANT_SIMD_EXCEPT + return specialcase (xm, v_fma_f64 (y, s, s), cmp); +#else return specialcase (s, y, n); +#endif + return v_fma_f64 (y, s, s); } VPCS_ALIAS diff --git a/math/v_exp.h b/math/v_exp.h index 305da19..1e7f7f3 100644 --- a/math/v_exp.h +++ b/math/v_exp.h @@ -2,7 +2,7 @@ * Declarations for double-precision e^x vector function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "v_math.h" diff --git a/math/v_exp2f.c b/math/v_exp2f.c index e3ea5af..7f40dba 100644 --- a/math/v_exp2f.c +++ b/math/v_exp2f.c @@ -1,8 +1,8 @@ /* * Single-precision vector 2^x function. * - * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" @@ -25,6 +25,22 @@ static const float Poly[] = { #define Shift v_f32 (0x1.8p23f) +#if WANT_SIMD_EXCEPT + +#define TinyBound 0x20000000 /* asuint (0x1p-63). */ +#define BigBound 0x42800000 /* asuint (0x1p6). */ + +VPCS_ATTR +static NOINLINE v_f32_t +specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp) +{ + /* If fenv exceptions are to be triggered correctly, fall back to the scalar + routine to special lanes. */ + return v_call_f32 (exp2f, x, y, cmp); +} + +#else + VPCS_ATTR static v_f32_t specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale) @@ -41,15 +57,28 @@ specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0)); } +#endif + VPCS_ATTR v_f32_t V_NAME(exp2f) (v_f32_t x) { - v_f32_t n, r, r2, scale, p, q, poly, absn; + v_f32_t n, r, r2, scale, p, q, poly; v_u32_t cmp, e; - /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = n + r, with r in [-1/2, 1/2]. */ +#if WANT_SIMD_EXCEPT + cmp = v_cond_u32 ((v_as_u32_f32 (x) & 0x7fffffff) - TinyBound + >= BigBound - TinyBound); + v_f32_t xm = x; + /* If any lanes are special, mask them with 1 and retain a copy of x to allow + specialcase to fix special lanes later. This is only necessary if fenv + exceptions are to be triggered correctly. */ + if (unlikely (v_any_u32 (cmp))) + x = v_sel_f32 (cmp, v_f32 (1), x); +#endif + + /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] + x = n + r, with r in [-1/2, 1/2]. */ #if 0 v_f32_t z; z = x + Shift; @@ -62,16 +91,26 @@ V_NAME(exp2f) (v_f32_t x) e = v_as_u32_s32 (v_round_s32 (x)) << 23; #endif scale = v_as_f32_u32 (e + v_u32 (0x3f800000)); - absn = v_abs_f32 (n); + +#if !WANT_SIMD_EXCEPT + v_f32_t absn = v_abs_f32 (n); cmp = v_cond_u32 (absn > v_f32 (126.0f)); +#endif + r2 = r * r; p = v_fma_f32 (C0, r, C1); q = v_fma_f32 (C2, r, C3); q = v_fma_f32 (p, r2, q); p = C4 * r; poly = v_fma_f32 (q, r2, p); + if (unlikely (v_any_u32 (cmp))) +#if WANT_SIMD_EXCEPT + return specialcase (xm, v_fma_f32 (poly, scale, scale), cmp); +#else return specialcase (poly, n, e, absn, cmp, scale); +#endif + return v_fma_f32 (poly, scale, scale); } VPCS_ALIAS diff --git a/math/v_exp2f_1u.c b/math/v_exp2f_1u.c index 1caa14d..de1a32d 100644 --- a/math/v_exp2f_1u.c +++ b/math/v_exp2f_1u.c @@ -2,7 +2,7 @@ * Single-precision vector 2^x function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" diff --git a/math/v_exp_data.c b/math/v_exp_data.c index 3653554..30421da 100644 --- a/math/v_exp_data.c +++ b/math/v_exp_data.c @@ -2,7 +2,7 @@ * Lookup table for double-precision e^x vector function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "v_exp.h" diff --git a/math/v_expf.c b/math/v_expf.c index d403e00..ade23b2 100644 --- a/math/v_expf.c +++ b/math/v_expf.c @@ -1,8 +1,8 @@ /* * Single-precision vector e^x function. * - * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" @@ -28,6 +28,22 @@ static const float Poly[] = { #define Ln2hi v_f32 (0x1.62e4p-1f) #define Ln2lo v_f32 (0x1.7f7d1cp-20f) +#if WANT_SIMD_EXCEPT + +#define TinyBound 0x20000000 /* asuint (0x1p-63). */ +#define BigBound 0x42800000 /* asuint (0x1p6). */ + +VPCS_ATTR +static NOINLINE v_f32_t +specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp) +{ + /* If fenv exceptions are to be triggered correctly, fall back to the scalar + routine to special lanes. */ + return v_call_f32 (expf, x, y, cmp); +} + +#else + VPCS_ATTR static v_f32_t specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale) @@ -44,15 +60,28 @@ specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0)); } +#endif + VPCS_ATTR v_f32_t V_NAME(expf) (v_f32_t x) { - v_f32_t n, r, r2, scale, p, q, poly, absn, z; + v_f32_t n, r, r2, scale, p, q, poly, z; v_u32_t cmp, e; - /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ +#if WANT_SIMD_EXCEPT + cmp = v_cond_u32 ((v_as_u32_f32 (x) & 0x7fffffff) - TinyBound + >= BigBound - TinyBound); + v_f32_t xm = x; + /* If any lanes are special, mask them with 1 and retain a copy of x to allow + specialcase to fix special lanes later. This is only necessary if fenv + exceptions are to be triggered correctly. */ + if (unlikely (v_any_u32 (cmp))) + x = v_sel_f32 (cmp, v_f32 (1), x); +#endif + + /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] + x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ #if 1 z = v_fma_f32 (x, InvLn2, Shift); n = z - Shift; @@ -67,16 +96,26 @@ V_NAME(expf) (v_f32_t x) e = v_as_u32_s32 (v_round_s32 (z)) << 23; #endif scale = v_as_f32_u32 (e + v_u32 (0x3f800000)); - absn = v_abs_f32 (n); + +#if !WANT_SIMD_EXCEPT + v_f32_t absn = v_abs_f32 (n); cmp = v_cond_u32 (absn > v_f32 (126.0f)); +#endif + r2 = r * r; p = v_fma_f32 (C0, r, C1); q = v_fma_f32 (C2, r, C3); q = v_fma_f32 (p, r2, q); p = C4 * r; poly = v_fma_f32 (q, r2, p); + if (unlikely (v_any_u32 (cmp))) +#if WANT_SIMD_EXCEPT + return specialcase (xm, v_fma_f32 (poly, scale, scale), cmp); +#else return specialcase (poly, n, e, absn, cmp, scale); +#endif + return v_fma_f32 (poly, scale, scale); } VPCS_ALIAS diff --git a/math/v_expf_1u.c b/math/v_expf_1u.c index 023bd24..8f0ae91 100644 --- a/math/v_expf_1u.c +++ b/math/v_expf_1u.c @@ -2,7 +2,7 @@ * Single-precision vector e^x function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" diff --git a/math/v_log.c b/math/v_log.c index d84c740..47a8291 100644 --- a/math/v_log.c +++ b/math/v_log.c @@ -2,7 +2,7 @@ * Double-precision vector log(x) function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" diff --git a/math/v_log.h b/math/v_log.h index bcc2fa6..a37bbc2 100644 --- a/math/v_log.h +++ b/math/v_log.h @@ -2,7 +2,7 @@ * Declarations for double-precision log(x) vector function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "v_math.h" diff --git a/math/v_log_data.c b/math/v_log_data.c index 97ee5b0..ec1c8e5 100644 --- a/math/v_log_data.c +++ b/math/v_log_data.c @@ -2,7 +2,7 @@ * Lookup table for double-precision log(x) vector function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "v_log.h" diff --git a/math/v_logf.c b/math/v_logf.c index 7373192..93a5375 100644 --- a/math/v_logf.c +++ b/math/v_logf.c @@ -2,7 +2,7 @@ * Single-precision vector log function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" diff --git a/math/v_math.h b/math/v_math.h index f2cc467..3289916 100644 --- a/math/v_math.h +++ b/math/v_math.h @@ -1,8 +1,8 @@ /* * Vector math abstractions. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _V_MATH_H @@ -191,6 +191,11 @@ v_round_s32 (v_f32_t x) { return __builtin_lroundf (x); /* relies on -fno-math-errno. */ } +static inline v_f32_t +v_sel_f32 (v_u32_t p, v_f32_t x, v_f32_t y) +{ + return p ? x : y; +} /* convert to type1 from type2. */ static inline v_f32_t v_to_f32_s32 (v_s32_t x) @@ -311,6 +316,11 @@ v_round_s64 (v_f64_t x) { return __builtin_lround (x); /* relies on -fno-math-errno. */ } +static inline v_f64_t +v_sel_f64 (v_u64_t p, v_f64_t x, v_f64_t y) +{ + return p ? x : y; +} /* convert to type1 from type2. */ static inline v_f64_t v_to_f64_s64 (v_s64_t x) @@ -460,6 +470,11 @@ v_round_s32 (v_f32_t x) { return vcvtaq_s32_f32 (x); } +static inline v_f32_t +v_sel_f32 (v_u32_t p, v_f32_t x, v_f32_t y) +{ + return vbslq_f32 (p, x, y); +} /* convert to type1 from type2. */ static inline v_f32_t v_to_f32_s32 (v_s32_t x) @@ -584,6 +599,11 @@ v_round_s64 (v_f64_t x) { return vcvtaq_s64_f64 (x); } +static inline v_f64_t +v_sel_f64 (v_u64_t p, v_f64_t x, v_f64_t y) +{ + return vbslq_f64 (p, x, y); +} /* convert to type1 from type2. */ static inline v_f64_t v_to_f64_s64 (v_s64_t x) diff --git a/math/v_pow.c b/math/v_pow.c index a209d57..05a83aa 100644 --- a/math/v_pow.c +++ b/math/v_pow.c @@ -2,7 +2,7 @@ * Double-precision vector pow function. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" diff --git a/math/v_powf.c b/math/v_powf.c index fb80fa6..ad8ab8d 100644 --- a/math/v_powf.c +++ b/math/v_powf.c @@ -2,7 +2,7 @@ * Single-precision vector powf function. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" diff --git a/math/v_sin.c b/math/v_sin.c index 2b9ed05..9dbb9de 100644 --- a/math/v_sin.c +++ b/math/v_sin.c @@ -1,8 +1,8 @@ /* * Double-precision vector sin function. * - * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" @@ -34,9 +34,15 @@ static const double Poly[] = { #define Pi2 v_f64 (0x1.1a62633145c06p-53) #define Pi3 v_f64 (0x1.c1cd129024e09p-106) #define Shift v_f64 (0x1.8p52) -#define RangeVal v_f64 (0x1p23) #define AbsMask v_u64 (0x7fffffffffffffff) +#if WANT_SIMD_EXCEPT +#define TinyBound 0x202 /* top12 (asuint64 (0x1p-509)). */ +#define Thresh 0x214 /* top12 (asuint64 (RangeVal)) - TinyBound. */ +#else +#define RangeVal v_f64 (0x1p23) +#endif + VPCS_ATTR __attribute__ ((noinline)) static v_f64_t specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) @@ -49,11 +55,22 @@ v_f64_t V_NAME(sin) (v_f64_t x) { v_f64_t n, r, r2, y; - v_u64_t sign, odd, cmp; + v_u64_t sign, odd, cmp, ir; - r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask); + ir = v_as_u64_f64 (x) & AbsMask; + r = v_as_f64_u64 (ir); sign = v_as_u64_f64 (x) & ~AbsMask; - cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal)); + +#if WANT_SIMD_EXCEPT + /* Detect |x| <= 0x1p-509 or |x| >= RangeVal. If fenv exceptions are to be + triggered correctly, set any special lanes to 1 (which is neutral w.r.t. + fenv). These lanes will be fixed by specialcase later. */ + cmp = v_cond_u64 ((ir >> 52) - TinyBound >= Thresh); + if (unlikely (v_any_u64 (cmp))) + r = v_sel_f64 (cmp, v_f64 (1), r); +#else + cmp = v_cond_u64 (ir >= v_as_u64_f64 (RangeVal)); +#endif /* n = rint(|x|/pi). */ n = v_fma_f64 (InvPi, r, Shift); diff --git a/math/v_sinf.c b/math/v_sinf.c index e66bfce..ce35dac 100644 --- a/math/v_sinf.c +++ b/math/v_sinf.c @@ -1,8 +1,8 @@ /* * Single-precision vector sin function. * - * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" @@ -24,6 +24,7 @@ static const float Poly[] = { #define A7 v_f32 (Poly[1]) #define A9 v_f32 (Poly[0]) #define RangeVal v_f32 (0x1p20f) +#define TinyBound v_f32 (0x1p-61f) #define InvPi v_f32 (0x1.45f306p-2f) #define Shift v_f32 (0x1.8p+23f) #define AbsMask v_u32 (0x7fffffff) @@ -41,11 +42,23 @@ v_f32_t V_NAME(sinf) (v_f32_t x) { v_f32_t n, r, r2, y; - v_u32_t sign, odd, cmp; + v_u32_t sign, odd, cmp, ir; - r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask); + ir = v_as_u32_f32 (x) & AbsMask; + r = v_as_f32_u32 (ir); sign = v_as_u32_f32 (x) & ~AbsMask; - cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal)); + +#if WANT_SIMD_EXCEPT + cmp = v_cond_u32 ((ir - v_as_u32_f32 (TinyBound) + >= v_as_u32_f32 (RangeVal) - v_as_u32_f32 (TinyBound))); + if (unlikely (v_any_u32 (cmp))) + /* If fenv exceptions are to be triggered correctly, set any special lanes + to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by + specialcase later. */ + r = v_sel_f32 (cmp, v_f32 (1), r); +#else + cmp = v_cond_u32 (ir >= v_as_u32_f32 (RangeVal)); +#endif /* n = rint(|x|/pi) */ n = v_fma_f32 (InvPi, r, Shift); diff --git a/math/vn_cos.c b/math/vn_cos.c index b57a549..4b5b237 100644 --- a/math/vn_cos.c +++ b/math/vn_cos.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_cos. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_cosf.c b/math/vn_cosf.c index 6321d46..86dd26e 100644 --- a/math/vn_cosf.c +++ b/math/vn_cosf.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_cosf. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_exp.c b/math/vn_exp.c index 06e269d..0d85b17 100644 --- a/math/vn_exp.c +++ b/math/vn_exp.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_exp. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_exp2f.c b/math/vn_exp2f.c index db9707e..da3bb40 100644 --- a/math/vn_exp2f.c +++ b/math/vn_exp2f.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_exp2f. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_exp2f_1u.c b/math/vn_exp2f_1u.c index 17bd0ab..3e3a247 100644 --- a/math/vn_exp2f_1u.c +++ b/math/vn_exp2f_1u.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_exp2f_1u. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_expf.c b/math/vn_expf.c index 0652907..6e91a94 100644 --- a/math/vn_expf.c +++ b/math/vn_expf.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_expf. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_expf_1u.c b/math/vn_expf_1u.c index 3be7768..57ae6a3 100644 --- a/math/vn_expf_1u.c +++ b/math/vn_expf_1u.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_expf_1u. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_log.c b/math/vn_log.c index b58fe8f..902bff1 100644 --- a/math/vn_log.c +++ b/math/vn_log.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_log. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_logf.c b/math/vn_logf.c index cc5b8ae..07e4936 100644 --- a/math/vn_logf.c +++ b/math/vn_logf.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_logf. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_pow.c b/math/vn_pow.c index 2609501..1a980ff 100644 --- a/math/vn_pow.c +++ b/math/vn_pow.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_pow. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_powf.c b/math/vn_powf.c index 095d07e..a42ade3 100644 --- a/math/vn_powf.c +++ b/math/vn_powf.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_powf. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_sin.c b/math/vn_sin.c index 905c796..64b05c8 100644 --- a/math/vn_sin.c +++ b/math/vn_sin.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_sin. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs diff --git a/math/vn_sinf.c b/math/vn_sinf.c index 1214e1a..6e880c6 100644 --- a/math/vn_sinf.c +++ b/math/vn_sinf.c @@ -2,7 +2,7 @@ * AdvSIMD vector PCS variant of __v_sinf. * * Copyright (c) 2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "mathlib.h" #ifdef __vpcs |