aboutsummaryrefslogtreecommitdiff
path: root/math
diff options
context:
space:
mode:
Diffstat (limited to 'math')
-rw-r--r--math/Dir.mk15
-rw-r--r--math/README.contributors78
-rw-r--r--math/cosf.c6
-rw-r--r--math/erf.c2
-rw-r--r--math/erf_data.c2
-rw-r--r--math/erff.c2
-rw-r--r--math/erff_data.c2
-rw-r--r--math/exp.c2
-rw-r--r--math/exp2.c2
-rw-r--r--math/exp2f.c2
-rw-r--r--math/exp2f_data.c2
-rw-r--r--math/exp_data.c2
-rw-r--r--math/expf.c2
-rw-r--r--math/include/mathlib.h2
-rw-r--r--math/log.c2
-rw-r--r--math/log2.c2
-rw-r--r--math/log2_data.c2
-rw-r--r--math/log2f.c2
-rw-r--r--math/log2f_data.c2
-rw-r--r--math/log_data.c2
-rw-r--r--math/logf.c6
-rw-r--r--math/logf_data.c2
-rw-r--r--math/math_config.h2
-rw-r--r--math/math_err.c2
-rw-r--r--math/math_errf.c2
-rw-r--r--math/pow.c2
-rw-r--r--math/pow_log_data.c2
-rw-r--r--math/powf.c2
-rw-r--r--math/powf_log2_data.c2
-rw-r--r--math/s_cos.c2
-rw-r--r--math/s_cosf.c2
-rw-r--r--math/s_exp.c2
-rw-r--r--math/s_exp2f.c2
-rw-r--r--math/s_exp2f_1u.c2
-rw-r--r--math/s_expf.c2
-rw-r--r--math/s_expf_1u.c2
-rw-r--r--math/s_log.c2
-rw-r--r--math/s_logf.c2
-rw-r--r--math/s_pow.c2
-rw-r--r--math/s_powf.c2
-rw-r--r--math/s_sin.c2
-rw-r--r--math/s_sinf.c2
-rw-r--r--math/sincosf.c6
-rw-r--r--math/sincosf.h6
-rw-r--r--math/sincosf_data.c2
-rw-r--r--math/sinf.c6
-rw-r--r--math/test/mathbench.c281
-rw-r--r--math/test/mathbench_funcs.h100
-rw-r--r--math/test/mathbench_wrappers.h104
-rw-r--r--math/test/mathtest.c9
-rw-r--r--math/test/rtest/dotest.c2
-rw-r--r--math/test/rtest/intern.h2
-rw-r--r--math/test/rtest/main.c2
-rw-r--r--math/test/rtest/random.c2
-rw-r--r--math/test/rtest/random.h2
-rw-r--r--math/test/rtest/semi.c2
-rw-r--r--math/test/rtest/semi.h2
-rw-r--r--math/test/rtest/types.h2
-rw-r--r--math/test/rtest/wrappers.c2
-rw-r--r--math/test/rtest/wrappers.h2
-rwxr-xr-xmath/test/runulp.sh47
-rw-r--r--math/test/testcases/directed/cosf.tst2
-rw-r--r--math/test/testcases/directed/erf.tst2
-rw-r--r--math/test/testcases/directed/erff.tst2
-rw-r--r--math/test/testcases/directed/exp.tst2
-rw-r--r--math/test/testcases/directed/exp2.tst2
-rw-r--r--math/test/testcases/directed/exp2f.tst2
-rw-r--r--math/test/testcases/directed/expf.tst2
-rw-r--r--math/test/testcases/directed/log.tst2
-rw-r--r--math/test/testcases/directed/log2.tst2
-rw-r--r--math/test/testcases/directed/log2f.tst2
-rw-r--r--math/test/testcases/directed/logf.tst2
-rw-r--r--math/test/testcases/directed/pow.tst2
-rw-r--r--math/test/testcases/directed/powf.tst2
-rw-r--r--math/test/testcases/directed/sincosf.tst2
-rw-r--r--math/test/testcases/directed/sinf.tst2
-rw-r--r--math/test/testcases/random/double.tst2
-rw-r--r--math/test/testcases/random/float.tst2
-rw-r--r--math/test/ulp.c248
-rw-r--r--math/test/ulp.h2
-rw-r--r--math/test/ulp_funcs.h78
-rw-r--r--math/test/ulp_wrappers.h59
-rw-r--r--math/tools/cos.sollya2
-rw-r--r--math/tools/exp.sollya2
-rw-r--r--math/tools/exp2.sollya2
-rw-r--r--math/tools/log.sollya2
-rw-r--r--math/tools/log2.sollya2
-rw-r--r--math/tools/log2_abs.sollya2
-rw-r--r--math/tools/log_abs.sollya2
-rwxr-xr-xmath/tools/plot.py2
-rwxr-xr-xmath/tools/remez.jl2
-rw-r--r--math/tools/sin.sollya2
-rw-r--r--math/tools/v_exp.sollya2
-rw-r--r--math/tools/v_log.sollya2
-rw-r--r--math/tools/v_sin.sollya2
-rw-r--r--math/v_cos.c12
-rw-r--r--math/v_cosf.c12
-rw-r--r--math/v_exp.c38
-rw-r--r--math/v_exp.h2
-rw-r--r--math/v_exp2f.c51
-rw-r--r--math/v_exp2f_1u.c2
-rw-r--r--math/v_exp_data.c2
-rw-r--r--math/v_expf.c51
-rw-r--r--math/v_expf_1u.c2
-rw-r--r--math/v_log.c2
-rw-r--r--math/v_log.h2
-rw-r--r--math/v_log_data.c2
-rw-r--r--math/v_logf.c2
-rw-r--r--math/v_math.h24
-rw-r--r--math/v_pow.c2
-rw-r--r--math/v_powf.c2
-rw-r--r--math/v_sin.c29
-rw-r--r--math/v_sinf.c23
-rw-r--r--math/vn_cos.c2
-rw-r--r--math/vn_cosf.c2
-rw-r--r--math/vn_exp.c2
-rw-r--r--math/vn_exp2f.c2
-rw-r--r--math/vn_exp2f_1u.c2
-rw-r--r--math/vn_expf.c2
-rw-r--r--math/vn_expf_1u.c2
-rw-r--r--math/vn_log.c2
-rw-r--r--math/vn_logf.c2
-rw-r--r--math/vn_pow.c2
-rw-r--r--math/vn_powf.c2
-rw-r--r--math/vn_sin.c2
-rw-r--r--math/vn_sinf.c2
126 files changed, 1020 insertions, 475 deletions
diff --git a/math/Dir.mk b/math/Dir.mk
index 3b841ab..2a9cad1 100644
--- a/math/Dir.mk
+++ b/math/Dir.mk
@@ -1,7 +1,7 @@
# Makefile fragment - requires GNU make
#
-# Copyright (c) 2019, Arm Limited.
-# SPDX-License-Identifier: MIT
+# Copyright (c) 2019-2022, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
S := $(srcdir)/math
B := build/math
@@ -15,6 +15,7 @@ math-test-srcs := \
math-test-host-srcs := $(wildcard $(S)/test/rtest/*.[cS])
math-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
+math-test-includes := $(patsubst $(S)/%,build/include/%,$(wildcard $(S)/test/*.h))
math-libs := \
build/lib/libmathlib.so \
@@ -42,10 +43,11 @@ math-files := \
$(math-tools) \
$(math-host-tools) \
$(math-includes) \
+ $(math-test-includes) \
-all-math: $(math-libs) $(math-tools) $(math-includes)
+all-math: $(math-libs) $(math-tools) $(math-includes) $(math-test-includes)
-$(math-objs): $(math-includes)
+$(math-objs): $(math-includes) $(math-test-includes)
$(math-objs): CFLAGS_ALL += $(math-cflags)
$(B)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
$(math-host-objs): CC = $(HOST_CC)
@@ -83,6 +85,9 @@ build/bin/ulp: $(B)/test/ulp.o build/lib/libmathlib.a
build/include/%.h: $(S)/include/%.h
cp $< $@
+build/include/test/%.h: $(S)/test/%.h
+ cp $< $@
+
build/bin/%.sh: $(S)/test/%.sh
cp $< $@
@@ -96,7 +101,7 @@ check-math-rtest: $(math-host-tools) $(math-tools)
cat $(math-rtests) | build/bin/rtest | $(EMULATOR) build/bin/mathtest $(math-testflags)
check-math-ulp: $(math-tools)
- ULPFLAGS="$(math-ulpflags)" build/bin/runulp.sh $(EMULATOR)
+ ULPFLAGS="$(math-ulpflags)" WANT_SIMD_EXCEPT="$(WANT_SIMD_EXCEPT)" build/bin/runulp.sh $(EMULATOR)
check-math: check-math-test check-math-rtest check-math-ulp
diff --git a/math/README.contributors b/math/README.contributors
new file mode 100644
index 0000000..33e7ba3
--- /dev/null
+++ b/math/README.contributors
@@ -0,0 +1,78 @@
+STYLE REQUIREMENTS
+==================
+
+1. Most code in this sub-directory is expected to be upstreamed into glibc so
+ the GNU Coding Standard and glibc specific conventions should be followed
+ to ease upstreaming.
+
+2. ABI and symbols: the code should be written so it is suitable for inclusion
+ into a libc with minimal changes. This e.g. means that internal symbols
+ should be hidden and in the implementation reserved namespace according to
+ ISO C and POSIX rules. If possible the built shared libraries and static
+ library archives should be usable to override libc symbols at link time (or
+ at runtime via LD_PRELOAD). This requires the symbols to follow the glibc ABI
+ (other than symbol versioning), this cannot be done reliably for static
+ linking so this is a best effort requirement.
+
+3. API: include headers should be suitable for benchmarking and testing code
+ and should not conflict with libc headers.
+
+
+CONTRIBUTION GUIDELINES FOR math SUB-DIRECTORY
+==============================================
+
+1. Math functions have quality and performance requirements.
+
+2. Quality:
+ - Worst-case ULP error should be small in the entire input domain (for most
+ common double precision scalar functions the target is < 0.66 ULP error,
+ and < 1 ULP for single precision, even performance optimized function
+ variant should not have > 5 ULP error if the goal is to be a drop in
+ replacement for a standard math function), this should be tested
+ statistically (or on all inputs if possible in reasonable amount of time).
+ The ulp tool is for this and runulp.sh should be updated for new functions.
+
+ - All standard rounding modes need to be supported but in non-default rounding
+ modes the quality requirement can be relaxed. (Non-nearest rounded
+ computation can be slow and inaccurate but has to be correct for conformance
+ reasons.)
+
+ - Special cases and error handling need to follow ISO C Annex F requirements,
+ POSIX requirements, IEEE 754-2008 requirements and Glibc requiremnts:
+ https://www.gnu.org/software/libc/manual/html_mono/libc.html#Errors-in-Math-Functions
+ this should be tested by direct tests (glibc test system may be used for it).
+
+ - Error handling code should be decoupled from the approximation code as much
+ as possible. (There are helper functions, these take care of errno as well
+ as exception raising.)
+
+ - Vector math code does not need to work in non-nearest rounding mode and error
+ handling side effects need not happen (fenv exceptions and errno), but the
+ result should be correct (within quality requirements, which are lower for
+ vector code than for scalar code).
+
+ - Error bounds of the approximation should be clearly documented.
+
+ - The code should build and pass tests on arm, aarch64 and x86_64 GNU linux
+ systems. (Routines and features can be disabled on specific targets, but
+ the build must complete). On aarch64, both little- and big-endian targets
+ are supported as well as valid combinations of architecture extensions.
+ The configurations that should be tested depend on the contribution.
+
+3. Performance:
+ - Common math code should be benchmarked on modern aarch64 microarchitectures
+ over typical inputs.
+
+ - Performance improvements should be documented (relative numbers can be
+ published; it is enough to use the mathbench microbenchmark tool which should
+ be updated for new functions).
+
+ - Attention should be paid to the compilation flags: for aarch64 fma
+ contraction should be on and math errno turned off so some builtins can be
+ inlined.
+
+ - The code should be reasonably performant on x86_64 too, e.g. some rounding
+ instructions and fma may not be available on x86_64, such builtins turn into
+ libc calls with slow code. Such slowdown is not acceptable, a faster fallback
+ should be present: glibc and bionic use the same code on all targets. (This
+ does not apply to vector math code).
diff --git a/math/cosf.c b/math/cosf.c
index f29f194..6293ce8 100644
--- a/math/cosf.c
+++ b/math/cosf.c
@@ -1,8 +1,8 @@
/*
* Single-precision cos function.
*
- * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
@@ -22,7 +22,7 @@ cosf (float y)
int n;
const sincos_t *p = &__sincosf_table[0];
- if (abstop12 (y) < abstop12 (pio4))
+ if (abstop12 (y) < abstop12 (pio4f))
{
double x2 = x * x;
diff --git a/math/erf.c b/math/erf.c
index 12d7e51..5f9f40d 100644
--- a/math/erf.c
+++ b/math/erf.c
@@ -2,7 +2,7 @@
* Double-precision erf(x) function.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/erf_data.c b/math/erf_data.c
index 807875b..10cf1fa 100644
--- a/math/erf_data.c
+++ b/math/erf_data.c
@@ -2,7 +2,7 @@
* Shared data between erf and erfc.
*
* Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/erff.c b/math/erff.c
index a58e825..9fa476d 100644
--- a/math/erff.c
+++ b/math/erff.c
@@ -2,7 +2,7 @@
* Single-precision erf(x) function.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/math/erff_data.c b/math/erff_data.c
index fa6b1ef..f822788 100644
--- a/math/erff_data.c
+++ b/math/erff_data.c
@@ -2,7 +2,7 @@
* Data for approximation of erff.
*
* Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/exp.c b/math/exp.c
index 7f5024c..1de500c 100644
--- a/math/exp.c
+++ b/math/exp.c
@@ -2,7 +2,7 @@
* Double-precision e^x function.
*
* Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <float.h>
diff --git a/math/exp2.c b/math/exp2.c
index 35ab39f..a1eee44 100644
--- a/math/exp2.c
+++ b/math/exp2.c
@@ -2,7 +2,7 @@
* Double-precision 2^x function.
*
* Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <float.h>
diff --git a/math/exp2f.c b/math/exp2f.c
index 94b3253..776c3dd 100644
--- a/math/exp2f.c
+++ b/math/exp2f.c
@@ -2,7 +2,7 @@
* Single-precision 2^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <math.h>
diff --git a/math/exp2f_data.c b/math/exp2f_data.c
index 3fb0ad1..f0cb7fc 100644
--- a/math/exp2f_data.c
+++ b/math/exp2f_data.c
@@ -2,7 +2,7 @@
* Shared data between expf, exp2f and powf.
*
* Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/exp_data.c b/math/exp_data.c
index cba7683..714c845 100644
--- a/math/exp_data.c
+++ b/math/exp_data.c
@@ -2,7 +2,7 @@
* Shared data between exp, exp2 and pow.
*
* Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/expf.c b/math/expf.c
index 9b2f0c3..08a20d5 100644
--- a/math/expf.c
+++ b/math/expf.c
@@ -2,7 +2,7 @@
* Single-precision e^x function.
*
* Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <math.h>
diff --git a/math/include/mathlib.h b/math/include/mathlib.h
index 279d829..c520c37 100644
--- a/math/include/mathlib.h
+++ b/math/include/mathlib.h
@@ -2,7 +2,7 @@
* Public API.
*
* Copyright (c) 2015-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef _MATHLIB_H
diff --git a/math/log.c b/math/log.c
index d3b7bc6..43dfc2a 100644
--- a/math/log.c
+++ b/math/log.c
@@ -2,7 +2,7 @@
* Double-precision log(x) function.
*
* Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <float.h>
diff --git a/math/log2.c b/math/log2.c
index 55102b7..3f9c21b 100644
--- a/math/log2.c
+++ b/math/log2.c
@@ -2,7 +2,7 @@
* Double-precision log2(x) function.
*
* Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <float.h>
diff --git a/math/log2_data.c b/math/log2_data.c
index 3fc9b47..293bd7d 100644
--- a/math/log2_data.c
+++ b/math/log2_data.c
@@ -2,7 +2,7 @@
* Data for log2.
*
* Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/log2f.c b/math/log2f.c
index acb629e..0a44fa2 100644
--- a/math/log2f.c
+++ b/math/log2f.c
@@ -2,7 +2,7 @@
* Single-precision log2 function.
*
* Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <math.h>
diff --git a/math/log2f_data.c b/math/log2f_data.c
index f3546d7..4866ef7 100644
--- a/math/log2f_data.c
+++ b/math/log2f_data.c
@@ -2,7 +2,7 @@
* Data definition for log2f.
*
* Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/log_data.c b/math/log_data.c
index 96a098d..3ecc1f4 100644
--- a/math/log_data.c
+++ b/math/log_data.c
@@ -2,7 +2,7 @@
* Data for log.
*
* Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/logf.c b/math/logf.c
index cfbaee1..820f74c 100644
--- a/math/logf.c
+++ b/math/logf.c
@@ -1,8 +1,8 @@
/*
* Single-precision log function.
*
- * Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <math.h>
@@ -57,7 +57,7 @@ logf (float x)
tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t) tmp >> 23; /* arithmetic shift */
- iz = ix - (tmp & 0x1ff << 23);
+ iz = ix - (tmp & 0xff800000);
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);
diff --git a/math/logf_data.c b/math/logf_data.c
index e8973ce..0424768 100644
--- a/math/logf_data.c
+++ b/math/logf_data.c
@@ -2,7 +2,7 @@
* Data definition for logf.
*
* Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/math_config.h b/math/math_config.h
index e851043..7ffc0cd 100644
--- a/math/math_config.h
+++ b/math/math_config.h
@@ -2,7 +2,7 @@
* Configuration for math routines.
*
* Copyright (c) 2017-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef _MATH_CONFIG_H
diff --git a/math/math_err.c b/math/math_err.c
index 1bf9538..cfe0728 100644
--- a/math/math_err.c
+++ b/math/math_err.c
@@ -2,7 +2,7 @@
* Double-precision math error handling.
*
* Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/math_errf.c b/math/math_errf.c
index d5350b8..4233918 100644
--- a/math/math_errf.c
+++ b/math/math_errf.c
@@ -2,7 +2,7 @@
* Single-precision math error handling.
*
* Copyright (c) 2017-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/pow.c b/math/pow.c
index 86842c6..af719fe 100644
--- a/math/pow.c
+++ b/math/pow.c
@@ -2,7 +2,7 @@
* Double-precision x^y function.
*
* Copyright (c) 2018-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <float.h>
diff --git a/math/pow_log_data.c b/math/pow_log_data.c
index 45569c5..2a4c250 100644
--- a/math/pow_log_data.c
+++ b/math/pow_log_data.c
@@ -2,7 +2,7 @@
* Data for the log part of pow.
*
* Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/powf.c b/math/powf.c
index 6ba45d3..05c80bb 100644
--- a/math/powf.c
+++ b/math/powf.c
@@ -2,7 +2,7 @@
* Single-precision pow function.
*
* Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <math.h>
diff --git a/math/powf_log2_data.c b/math/powf_log2_data.c
index 97e0d98..243836a 100644
--- a/math/powf_log2_data.c
+++ b/math/powf_log2_data.c
@@ -2,7 +2,7 @@
* Data definition for powf.
*
* Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "math_config.h"
diff --git a/math/s_cos.c b/math/s_cos.c
index 53a95b0..e66d563 100644
--- a/math/s_cos.c
+++ b/math/s_cos.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_cos.c"
diff --git a/math/s_cosf.c b/math/s_cosf.c
index 914c02e..f615d26 100644
--- a/math/s_cosf.c
+++ b/math/s_cosf.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_cosf.c"
diff --git a/math/s_exp.c b/math/s_exp.c
index ac7246b..5da0099 100644
--- a/math/s_exp.c
+++ b/math/s_exp.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_exp.c"
diff --git a/math/s_exp2f.c b/math/s_exp2f.c
index df7dfd6..dcbfea9 100644
--- a/math/s_exp2f.c
+++ b/math/s_exp2f.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_exp2f.c"
diff --git a/math/s_exp2f_1u.c b/math/s_exp2f_1u.c
index 5e3852b..bf387e4 100644
--- a/math/s_exp2f_1u.c
+++ b/math/s_exp2f_1u.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_exp2f_1u.c"
diff --git a/math/s_expf.c b/math/s_expf.c
index 3492c46..dacda7f 100644
--- a/math/s_expf.c
+++ b/math/s_expf.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_expf.c"
diff --git a/math/s_expf_1u.c b/math/s_expf_1u.c
index eb7bbcb..0009644 100644
--- a/math/s_expf_1u.c
+++ b/math/s_expf_1u.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_expf_1u.c"
diff --git a/math/s_log.c b/math/s_log.c
index 23289cf..27d2eb2 100644
--- a/math/s_log.c
+++ b/math/s_log.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_log.c"
diff --git a/math/s_logf.c b/math/s_logf.c
index 9399350..7d98b2b 100644
--- a/math/s_logf.c
+++ b/math/s_logf.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_logf.c"
diff --git a/math/s_pow.c b/math/s_pow.c
index 2e34c9f..6eca2b2 100644
--- a/math/s_pow.c
+++ b/math/s_pow.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_pow.c"
diff --git a/math/s_powf.c b/math/s_powf.c
index 6d91a4a..1d55d90 100644
--- a/math/s_powf.c
+++ b/math/s_powf.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_powf.c"
diff --git a/math/s_sin.c b/math/s_sin.c
index 06982c2..0c61712 100644
--- a/math/s_sin.c
+++ b/math/s_sin.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_sin.c"
diff --git a/math/s_sinf.c b/math/s_sinf.c
index 68ca908..3aae611 100644
--- a/math/s_sinf.c
+++ b/math/s_sinf.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_sinf.c"
diff --git a/math/sincosf.c b/math/sincosf.c
index 9746f1c..446f21d 100644
--- a/math/sincosf.c
+++ b/math/sincosf.c
@@ -1,8 +1,8 @@
/*
* Single-precision sin/cos function.
*
- * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
@@ -22,7 +22,7 @@ sincosf (float y, float *sinp, float *cosp)
int n;
const sincos_t *p = &__sincosf_table[0];
- if (abstop12 (y) < abstop12 (pio4))
+ if (abstop12 (y) < abstop12 (pio4f))
{
double x2 = x * x;
diff --git a/math/sincosf.h b/math/sincosf.h
index 1e80fc9..ec23ed7 100644
--- a/math/sincosf.h
+++ b/math/sincosf.h
@@ -1,8 +1,8 @@
/*
* Header for sinf, cosf and sincosf.
*
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
@@ -12,7 +12,7 @@
/* 2PI * 2^-64. */
static const double pi63 = 0x1.921FB54442D18p-62;
/* PI / 4. */
-static const double pio4 = 0x1.921FB54442D18p-1;
+static const float pio4f = 0x1.921FB6p-1f;
/* The constants and polynomials for sine and cosine. */
typedef struct
diff --git a/math/sincosf_data.c b/math/sincosf_data.c
index ab4ac47..2252529 100644
--- a/math/sincosf_data.c
+++ b/math/sincosf_data.c
@@ -2,7 +2,7 @@
* Data definition for sinf, cosf and sincosf.
*
* Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/math/sinf.c b/math/sinf.c
index ddbc1da..8dd8ae4 100644
--- a/math/sinf.c
+++ b/math/sinf.c
@@ -1,8 +1,8 @@
/*
* Single-precision sin function.
*
- * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <math.h>
@@ -21,7 +21,7 @@ sinf (float y)
int n;
const sincos_t *p = &__sincosf_table[0];
- if (abstop12 (y) < abstop12 (pio4))
+ if (abstop12 (y) < abstop12 (pio4f))
{
s = x * x;
diff --git a/math/test/mathbench.c b/math/test/mathbench.c
index 0c17826..6e18e36 100644
--- a/math/test/mathbench.c
+++ b/math/test/mathbench.c
@@ -1,8 +1,8 @@
/*
* Microbenchmark for math functions.
*
- * Copyright (c) 2018-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#undef _GNU_SOURCE
@@ -66,6 +66,43 @@ v_float_dup (float x)
{
return (v_float){x, x, x, x};
}
+#if WANT_SVE_MATH
+#include <arm_sve.h>
+typedef svbool_t sv_bool;
+typedef svfloat64_t sv_double;
+
+#define sv_double_len() svcntd()
+
+static inline sv_double
+sv_double_load (const double *p)
+{
+ svbool_t pg = svptrue_b64();
+ return svld1(pg, p);
+}
+
+static inline sv_double
+sv_double_dup (double x)
+{
+ return svdup_n_f64(x);
+}
+
+typedef svfloat32_t sv_float;
+
+#define sv_float_len() svcntw()
+
+static inline sv_float
+sv_float_load (const float *p)
+{
+ svbool_t pg = svptrue_b32();
+ return svld1(pg, p);
+}
+
+static inline sv_float
+sv_float_dup (float x)
+{
+ return svdup_n_f32(x);
+}
+#endif
#else
/* dummy definitions to make things compile. */
typedef double v_double;
@@ -89,7 +126,6 @@ dummyf (float x)
{
return x;
}
-
#if WANT_VMATH
#if __aarch64__
static v_double
@@ -116,101 +152,25 @@ __vn_dummyf (v_float x)
{
return x;
}
-
-__vpcs static v_float
-xy__vn_powf (v_float x)
-{
- return __vn_powf (x, x);
-}
-
-__vpcs static v_float
-xy_Z_powf (v_float x)
+#endif
+#if WANT_SVE_MATH
+static sv_double
+__sv_dummy (sv_double x, sv_bool pg)
{
- return _ZGVnN4vv_powf (x, x);
+ return x;
}
-__vpcs static v_double
-xy__vn_pow (v_double x)
+static sv_float
+__sv_dummyf (sv_float x, sv_bool pg)
{
- return __vn_pow (x, x);
+ return x;
}
-__vpcs static v_double
-xy_Z_pow (v_double x)
-{
- return _ZGVnN2vv_pow (x, x);
-}
#endif
-
-static v_float
-xy__v_powf (v_float x)
-{
- return __v_powf (x, x);
-}
-
-static v_double
-xy__v_pow (v_double x)
-{
- return __v_pow (x, x);
-}
#endif
-
-static float
-xy__s_powf (float x)
-{
- return __s_powf (x, x);
-}
-
-static double
-xy__s_pow (double x)
-{
- return __s_pow (x, x);
-}
#endif
-static double
-xypow (double x)
-{
- return pow (x, x);
-}
-
-static float
-xypowf (float x)
-{
- return powf (x, x);
-}
-
-static double
-xpow (double x)
-{
- return pow (x, 23.4);
-}
-
-static float
-xpowf (float x)
-{
- return powf (x, 23.4f);
-}
-
-static double
-ypow (double x)
-{
- return pow (2.34, x);
-}
-
-static float
-ypowf (float x)
-{
- return powf (2.34f, x);
-}
-
-static float
-sincosf_wrap (float x)
-{
- float s, c;
- sincosf (x, &s, &c);
- return s + c;
-}
+#include "test/mathbench_wrappers.h"
static const struct fun
{
@@ -229,6 +189,10 @@ static const struct fun
__vpcs v_double (*vnd) (v_double);
__vpcs v_float (*vnf) (v_float);
#endif
+#if WANT_SVE_MATH
+ sv_double (*svd) (sv_double, sv_bool);
+ sv_float (*svf) (sv_float, sv_bool);
+#endif
} fun;
} funtab[] = {
#define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
@@ -237,106 +201,25 @@ static const struct fun
#define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
#define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
#define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
+#define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}},
+#define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}},
D (dummy, 1.0, 2.0)
-D (exp, -9.9, 9.9)
-D (exp, 0.5, 1.0)
-D (exp2, -9.9, 9.9)
-D (log, 0.01, 11.1)
-D (log, 0.999, 1.001)
-D (log2, 0.01, 11.1)
-D (log2, 0.999, 1.001)
-{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
-D (xpow, 0.01, 11.1)
-D (ypow, -9.9, 9.9)
-D (erf, -6.0, 6.0)
-
F (dummyf, 1.0, 2.0)
-F (expf, -9.9, 9.9)
-F (exp2f, -9.9, 9.9)
-F (logf, 0.01, 11.1)
-F (log2f, 0.01, 11.1)
-{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
-F (xpowf, 0.01, 11.1)
-F (ypowf, -9.9, 9.9)
-{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
-F (sinf, 0.1, 0.7)
-F (sinf, 0.8, 3.1)
-F (sinf, -3.1, 3.1)
-F (sinf, 3.3, 33.3)
-F (sinf, 100, 1000)
-F (sinf, 1e6, 1e32)
-F (cosf, 0.1, 0.7)
-F (cosf, 0.8, 3.1)
-F (cosf, -3.1, 3.1)
-F (cosf, 3.3, 33.3)
-F (cosf, 100, 1000)
-F (cosf, 1e6, 1e32)
-F (erff, -4.0, 4.0)
#if WANT_VMATH
-D (__s_sin, -3.1, 3.1)
-D (__s_cos, -3.1, 3.1)
-D (__s_exp, -9.9, 9.9)
-D (__s_log, 0.01, 11.1)
-{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
-F (__s_expf, -9.9, 9.9)
-F (__s_expf_1u, -9.9, 9.9)
-F (__s_exp2f, -9.9, 9.9)
-F (__s_exp2f_1u, -9.9, 9.9)
-F (__s_logf, 0.01, 11.1)
-{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
-F (__s_sinf, -3.1, 3.1)
-F (__s_cosf, -3.1, 3.1)
#if __aarch64__
VD (__v_dummy, 1.0, 2.0)
-VD (__v_sin, -3.1, 3.1)
-VD (__v_cos, -3.1, 3.1)
-VD (__v_exp, -9.9, 9.9)
-VD (__v_log, 0.01, 11.1)
-{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
VF (__v_dummyf, 1.0, 2.0)
-VF (__v_expf, -9.9, 9.9)
-VF (__v_expf_1u, -9.9, 9.9)
-VF (__v_exp2f, -9.9, 9.9)
-VF (__v_exp2f_1u, -9.9, 9.9)
-VF (__v_logf, 0.01, 11.1)
-{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
-VF (__v_sinf, -3.1, 3.1)
-VF (__v_cosf, -3.1, 3.1)
#ifdef __vpcs
VND (__vn_dummy, 1.0, 2.0)
-VND (__vn_exp, -9.9, 9.9)
-VND (_ZGVnN2v_exp, -9.9, 9.9)
-VND (__vn_log, 0.01, 11.1)
-VND (_ZGVnN2v_log, 0.01, 11.1)
-{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
-{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
-VND (__vn_sin, -3.1, 3.1)
-VND (_ZGVnN2v_sin, -3.1, 3.1)
-VND (__vn_cos, -3.1, 3.1)
-VND (_ZGVnN2v_cos, -3.1, 3.1)
VNF (__vn_dummyf, 1.0, 2.0)
-VNF (__vn_expf, -9.9, 9.9)
-VNF (_ZGVnN4v_expf, -9.9, 9.9)
-VNF (__vn_expf_1u, -9.9, 9.9)
-VNF (__vn_exp2f, -9.9, 9.9)
-VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
-VNF (__vn_exp2f_1u, -9.9, 9.9)
-VNF (__vn_logf, 0.01, 11.1)
-VNF (_ZGVnN4v_logf, 0.01, 11.1)
-{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
-{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
-VNF (__vn_sinf, -3.1, 3.1)
-VNF (_ZGVnN4v_sinf, -3.1, 3.1)
-VNF (__vn_cosf, -3.1, 3.1)
-VNF (_ZGVnN4v_cosf, -3.1, 3.1)
+#endif
+#if WANT_SVE_MATH
+SVD (__sv_dummy, 1.0, 2.0)
+SVF (__sv_dummyf, 1.0, 2.0)
#endif
#endif
#endif
+#include "test/mathbench_funcs.h"
{0},
#undef F
#undef D
@@ -344,6 +227,8 @@ VNF (_ZGVnN4v_cosf, -3.1, 3.1)
#undef VD
#undef VNF
#undef VND
+#undef SVF
+#undef SVD
};
static void
@@ -508,6 +393,40 @@ runf_vn_latency (__vpcs v_float f (v_float))
}
#endif
+#if WANT_SVE_MATH
+static void
+run_sv_thruput (sv_double f (sv_double, sv_bool))
+{
+ for (int i = 0; i < N; i += sv_double_len ())
+ f (sv_double_load (A+i), svptrue_b64 ());
+}
+
+static void
+runf_sv_thruput (sv_float f (sv_float, sv_bool))
+{
+ for (int i = 0; i < N; i += sv_float_len ())
+ f (sv_float_load (Af+i), svptrue_b32 ());
+}
+
+static void
+run_sv_latency (sv_double f (sv_double, sv_bool))
+{
+ sv_double z = sv_double_dup (zero);
+ sv_double prev = z;
+ for (int i = 0; i < N; i += sv_double_len ())
+ prev = f (svmad_f64_x (svptrue_b64 (), prev, z, sv_double_load (A+i)), svptrue_b64 ());
+}
+
+static void
+runf_sv_latency (sv_float f (sv_float, sv_bool))
+{
+ sv_float z = sv_float_dup (zero);
+ sv_float prev = z;
+ for (int i = 0; i < N; i += sv_float_len ())
+ prev = f (svmad_f32_x (svptrue_b32 (), prev, z, sv_float_load (Af+i)), svptrue_b32 ());
+}
+#endif
+
static uint64_t
tic (void)
{
@@ -570,6 +489,16 @@ bench1 (const struct fun *f, int type, double lo, double hi)
else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
TIMEIT (runf_vn_latency, f->fun.vnf);
#endif
+#if WANT_SVE_MATH
+ else if (f->prec == 'd' && type == 't' && f->vec == 's')
+ TIMEIT (run_sv_thruput, f->fun.svd);
+ else if (f->prec == 'd' && type == 'l' && f->vec == 's')
+ TIMEIT (run_sv_latency, f->fun.svd);
+ else if (f->prec == 'f' && type == 't' && f->vec == 's')
+ TIMEIT (runf_sv_thruput, f->fun.svf);
+ else if (f->prec == 'f' && type == 'l' && f->vec == 's')
+ TIMEIT (runf_sv_latency, f->fun.svf);
+#endif
if (type == 't')
{
diff --git a/math/test/mathbench_funcs.h b/math/test/mathbench_funcs.h
new file mode 100644
index 0000000..ad6dd2a
--- /dev/null
+++ b/math/test/mathbench_funcs.h
@@ -0,0 +1,100 @@
+/*
+ * Function entries for mathbench.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+D (exp, -9.9, 9.9)
+D (exp, 0.5, 1.0)
+D (exp2, -9.9, 9.9)
+D (log, 0.01, 11.1)
+D (log, 0.999, 1.001)
+D (log2, 0.01, 11.1)
+D (log2, 0.999, 1.001)
+{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
+D (xpow, 0.01, 11.1)
+D (ypow, -9.9, 9.9)
+D (erf, -6.0, 6.0)
+
+F (expf, -9.9, 9.9)
+F (exp2f, -9.9, 9.9)
+F (logf, 0.01, 11.1)
+F (log2f, 0.01, 11.1)
+{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
+F (xpowf, 0.01, 11.1)
+F (ypowf, -9.9, 9.9)
+{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
+F (sinf, 0.1, 0.7)
+F (sinf, 0.8, 3.1)
+F (sinf, -3.1, 3.1)
+F (sinf, 3.3, 33.3)
+F (sinf, 100, 1000)
+F (sinf, 1e6, 1e32)
+F (cosf, 0.1, 0.7)
+F (cosf, 0.8, 3.1)
+F (cosf, -3.1, 3.1)
+F (cosf, 3.3, 33.3)
+F (cosf, 100, 1000)
+F (cosf, 1e6, 1e32)
+F (erff, -4.0, 4.0)
+#if WANT_VMATH
+D (__s_sin, -3.1, 3.1)
+D (__s_cos, -3.1, 3.1)
+D (__s_exp, -9.9, 9.9)
+D (__s_log, 0.01, 11.1)
+{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
+F (__s_expf, -9.9, 9.9)
+F (__s_expf_1u, -9.9, 9.9)
+F (__s_exp2f, -9.9, 9.9)
+F (__s_exp2f_1u, -9.9, 9.9)
+F (__s_logf, 0.01, 11.1)
+{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
+F (__s_sinf, -3.1, 3.1)
+F (__s_cosf, -3.1, 3.1)
+#if __aarch64__
+VD (__v_sin, -3.1, 3.1)
+VD (__v_cos, -3.1, 3.1)
+VD (__v_exp, -9.9, 9.9)
+VD (__v_log, 0.01, 11.1)
+{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
+VF (__v_expf, -9.9, 9.9)
+VF (__v_expf_1u, -9.9, 9.9)
+VF (__v_exp2f, -9.9, 9.9)
+VF (__v_exp2f_1u, -9.9, 9.9)
+VF (__v_logf, 0.01, 11.1)
+{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
+VF (__v_sinf, -3.1, 3.1)
+VF (__v_cosf, -3.1, 3.1)
+#ifdef __vpcs
+VND (__vn_exp, -9.9, 9.9)
+VND (_ZGVnN2v_exp, -9.9, 9.9)
+VND (__vn_log, 0.01, 11.1)
+VND (_ZGVnN2v_log, 0.01, 11.1)
+{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
+{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
+VND (__vn_sin, -3.1, 3.1)
+VND (_ZGVnN2v_sin, -3.1, 3.1)
+VND (__vn_cos, -3.1, 3.1)
+VND (_ZGVnN2v_cos, -3.1, 3.1)
+VNF (__vn_expf, -9.9, 9.9)
+VNF (_ZGVnN4v_expf, -9.9, 9.9)
+VNF (__vn_expf_1u, -9.9, 9.9)
+VNF (__vn_exp2f, -9.9, 9.9)
+VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
+VNF (__vn_exp2f_1u, -9.9, 9.9)
+VNF (__vn_logf, 0.01, 11.1)
+VNF (_ZGVnN4v_logf, 0.01, 11.1)
+{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
+{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
+VNF (__vn_sinf, -3.1, 3.1)
+VNF (_ZGVnN4v_sinf, -3.1, 3.1)
+VNF (__vn_cosf, -3.1, 3.1)
+VNF (_ZGVnN4v_cosf, -3.1, 3.1)
+#endif
+#endif
+#endif
diff --git a/math/test/mathbench_wrappers.h b/math/test/mathbench_wrappers.h
new file mode 100644
index 0000000..8311f0f
--- /dev/null
+++ b/math/test/mathbench_wrappers.h
@@ -0,0 +1,104 @@
+/*
+ * Function wrappers for mathbench.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#if WANT_VMATH
+#if __aarch64__
+
+#ifdef __vpcs
+__vpcs static v_float
+xy__vn_powf (v_float x)
+{
+ return __vn_powf (x, x);
+}
+
+__vpcs static v_float
+xy_Z_powf (v_float x)
+{
+ return _ZGVnN4vv_powf (x, x);
+}
+
+__vpcs static v_double
+xy__vn_pow (v_double x)
+{
+ return __vn_pow (x, x);
+}
+
+__vpcs static v_double
+xy_Z_pow (v_double x)
+{
+ return _ZGVnN2vv_pow (x, x);
+}
+#endif // __vpcs
+
+static v_float
+xy__v_powf (v_float x)
+{
+ return __v_powf (x, x);
+}
+
+static v_double
+xy__v_pow (v_double x)
+{
+ return __v_pow (x, x);
+}
+#endif // __aarch64__
+
+static float
+xy__s_powf (float x)
+{
+ return __s_powf (x, x);
+}
+
+static double
+xy__s_pow (double x)
+{
+ return __s_pow (x, x);
+}
+#endif // WANT_VMATH
+
+static double
+xypow (double x)
+{
+ return pow (x, x);
+}
+
+static float
+xypowf (float x)
+{
+ return powf (x, x);
+}
+
+static double
+xpow (double x)
+{
+ return pow (x, 23.4);
+}
+
+static float
+xpowf (float x)
+{
+ return powf (x, 23.4f);
+}
+
+static double
+ypow (double x)
+{
+ return pow (2.34, x);
+}
+
+static float
+ypowf (float x)
+{
+ return powf (2.34f, x);
+}
+
+static float
+sincosf_wrap (float x)
+{
+ float s, c;
+ sincosf (x, &s, &c);
+ return s + c;
+}
diff --git a/math/test/mathtest.c b/math/test/mathtest.c
index 3108967..3168da4 100644
--- a/math/test/mathtest.c
+++ b/math/test/mathtest.c
@@ -1,8 +1,8 @@
/*
* mathtest.c - test rig for mathlib
*
- * Copyright (c) 1998-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 1998-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <assert.h>
@@ -196,9 +196,11 @@ int is_complex_rettype(int rettype) {
#define TFUNCARM(arg,ret,name,tolerance) { t_func, arg, ret, (void*)& ARM_PREFIX(name), m_none, tolerance, #name }
#define MFUNC(arg,ret,name,tolerance) { t_macro, arg, ret, NULL, m_##name, tolerance, #name }
+#ifndef PL
/* sincosf wrappers for easier testing. */
static float sincosf_sinf(float x) { float s,c; sincosf(x, &s, &c); return s; }
static float sincosf_cosf(float x) { float s,c; sincosf(x, &s, &c); return c; }
+#endif
test_func tfuncs[] = {
/* trigonometric */
@@ -218,9 +220,10 @@ test_func tfuncs[] = {
TFUNCARM(at_s,rt_s, tanf, 4*ULPUNIT),
TFUNCARM(at_s,rt_s, sinf, 3*ULPUNIT/4),
TFUNCARM(at_s,rt_s, cosf, 3*ULPUNIT/4),
+#ifndef PL
TFUNCARM(at_s,rt_s, sincosf_sinf, 3*ULPUNIT/4),
TFUNCARM(at_s,rt_s, sincosf_cosf, 3*ULPUNIT/4),
-
+#endif
/* hyperbolic */
TFUNC(at_d, rt_d, atanh, 4*ULPUNIT),
TFUNC(at_d, rt_d, asinh, 4*ULPUNIT),
diff --git a/math/test/rtest/dotest.c b/math/test/rtest/dotest.c
index 6be79e1..5b3e9b4 100644
--- a/math/test/rtest/dotest.c
+++ b/math/test/rtest/dotest.c
@@ -2,7 +2,7 @@
* dotest.c - actually generate mathlib test cases
*
* Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdio.h>
diff --git a/math/test/rtest/intern.h b/math/test/rtest/intern.h
index 12a9c74..3ebd7dd 100644
--- a/math/test/rtest/intern.h
+++ b/math/test/rtest/intern.h
@@ -2,7 +2,7 @@
* intern.h
*
* Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef mathtest_intern_h
diff --git a/math/test/rtest/main.c b/math/test/rtest/main.c
index 0d8ead8..3d533c9 100644
--- a/math/test/rtest/main.c
+++ b/math/test/rtest/main.c
@@ -2,7 +2,7 @@
* main.c
*
* Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <assert.h>
diff --git a/math/test/rtest/random.c b/math/test/rtest/random.c
index 5612396..1de3258 100644
--- a/math/test/rtest/random.c
+++ b/math/test/rtest/random.c
@@ -2,7 +2,7 @@
* random.c - random number generator for producing mathlib test cases
*
* Copyright (c) 1998-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "types.h"
diff --git a/math/test/rtest/random.h b/math/test/rtest/random.h
index b4b22df..0b477d7 100644
--- a/math/test/rtest/random.h
+++ b/math/test/rtest/random.h
@@ -2,7 +2,7 @@
* random.h - header for random.c
*
* Copyright (c) 2009-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "types.h"
diff --git a/math/test/rtest/semi.c b/math/test/rtest/semi.c
index c9f0daf..70a7844 100644
--- a/math/test/rtest/semi.c
+++ b/math/test/rtest/semi.c
@@ -2,7 +2,7 @@
* semi.c: test implementations of mathlib seminumerical functions
*
* Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdio.h>
diff --git a/math/test/rtest/semi.h b/math/test/rtest/semi.h
index 17dc415..7a1444e 100644
--- a/math/test/rtest/semi.h
+++ b/math/test/rtest/semi.h
@@ -2,7 +2,7 @@
* semi.h: header for semi.c
*
* Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef test_semi_h
diff --git a/math/test/rtest/types.h b/math/test/rtest/types.h
index 53cd557..e15b4e0 100644
--- a/math/test/rtest/types.h
+++ b/math/test/rtest/types.h
@@ -2,7 +2,7 @@
* types.h
*
* Copyright (c) 2005-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef mathtest_types_h
diff --git a/math/test/rtest/wrappers.c b/math/test/rtest/wrappers.c
index de45ac5..4410171 100644
--- a/math/test/rtest/wrappers.c
+++ b/math/test/rtest/wrappers.c
@@ -2,7 +2,7 @@
* wrappers.c - wrappers to modify output of MPFR/MPC test functions
*
* Copyright (c) 2014-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <assert.h>
diff --git a/math/test/rtest/wrappers.h b/math/test/rtest/wrappers.h
index 7b09c85..0a8a587 100644
--- a/math/test/rtest/wrappers.h
+++ b/math/test/rtest/wrappers.h
@@ -2,7 +2,7 @@
* wrappers.h - wrappers to modify output of MPFR/MPC test functions
*
* Copyright (c) 2014-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
typedef struct {
diff --git a/math/test/runulp.sh b/math/test/runulp.sh
index 0190d9a..b4000f6 100755
--- a/math/test/runulp.sh
+++ b/math/test/runulp.sh
@@ -2,8 +2,8 @@
# ULP error check script.
#
-# Copyright (c) 2019-2020, Arm Limited.
-# SPDX-License-Identifier: MIT
+# Copyright (c) 2019-2022, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
#set -x
set -eu
@@ -145,7 +145,7 @@ done
# vector functions
Ldir=0.5
r='n'
-flags="${ULPFLAGS:--q} -f"
+flags="${ULPFLAGS:--q}"
runs=
check __s_exp 1 && runs=1
runv=
@@ -229,7 +229,7 @@ L_sinf=1.4
L_cosf=1.4
L_powf=2.1
-while read G F R
+while read G F R D
do
[ "$R" = 1 ] || continue
case "$G" in \#*) continue ;; esac
@@ -239,7 +239,16 @@ do
do
[ -n "$X" ] || continue
case "$X" in \#*) continue ;; esac
- t $F $X
+ disable_fenv=""
+ if [ -z "$WANT_SIMD_EXCEPT" ] || [ $WANT_SIMD_EXCEPT -eq 0 ]; then
+ # If library was built with SIMD exceptions
+ # disabled, disable fenv checking in ulp
+ # tool. Otherwise, fenv checking may still be
+ # disabled by adding -f to the end of the run
+ # line.
+ disable_fenv="-f"
+ fi
+ t $D $disable_fenv $F $X
done << EOF
$range
EOF
@@ -255,10 +264,10 @@ log __v_log $runv
log __vn_log $runvn
log _ZGVnN2v_log $runvn
-pow __s_pow $runs
-pow __v_pow $runv
-pow __vn_pow $runvn
-pow _ZGVnN2vv_pow $runvn
+pow __s_pow $runs -f
+pow __v_pow $runv -f
+pow __vn_pow $runvn -f
+pow _ZGVnN2vv_pow $runvn -f
sin __s_sin $runs
sin __v_sin $runv
@@ -275,18 +284,18 @@ expf __v_expf $runv
expf __vn_expf $runvn
expf _ZGVnN4v_expf $runvn
-expf_1u __s_expf_1u $runs
-expf_1u __v_expf_1u $runv
-expf_1u __vn_expf_1u $runvn
+expf_1u __s_expf_1u $runs -f
+expf_1u __v_expf_1u $runv -f
+expf_1u __vn_expf_1u $runvn -f
exp2f __s_exp2f $runs
exp2f __v_exp2f $runv
exp2f __vn_exp2f $runvn
exp2f _ZGVnN4v_exp2f $runvn
-exp2f_1u __s_exp2f_1u $runs
-exp2f_1u __v_exp2f_1u $runv
-exp2f_1u __vn_exp2f_1u $runvn
+exp2f_1u __s_exp2f_1u $runs -f
+exp2f_1u __v_exp2f_1u $runv -f
+exp2f_1u __vn_exp2f_1u $runvn -f
logf __s_logf $runs
logf __v_logf $runv
@@ -303,10 +312,10 @@ cosf __v_cosf $runv
cosf __vn_cosf $runvn
cosf _ZGVnN4v_cosf $runvn
-powf __s_powf $runs
-powf __v_powf $runv
-powf __vn_powf $runvn
-powf _ZGVnN4vv_powf $runvn
+powf __s_powf $runs -f
+powf __v_powf $runv -f
+powf __vn_powf $runvn -f
+powf _ZGVnN4vv_powf $runvn -f
EOF
[ 0 -eq $FAIL ] || {
diff --git a/math/test/testcases/directed/cosf.tst b/math/test/testcases/directed/cosf.tst
index 7916044..7ea0d45 100644
--- a/math/test/testcases/directed/cosf.tst
+++ b/math/test/testcases/directed/cosf.tst
@@ -1,7 +1,7 @@
; cosf.tst - Directed test cases for SP cosine
;
; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=cosf op1=7fc00001 result=7fc00001 errno=0
func=cosf op1=ffc00001 result=7fc00001 errno=0
diff --git a/math/test/testcases/directed/erf.tst b/math/test/testcases/directed/erf.tst
index 7fa4d18..12384ce 100644
--- a/math/test/testcases/directed/erf.tst
+++ b/math/test/testcases/directed/erf.tst
@@ -1,7 +1,7 @@
; erf.tst - Directed test cases for erf
;
; Copyright (c) 2007-2020, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=erf op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=erf op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/math/test/testcases/directed/erff.tst b/math/test/testcases/directed/erff.tst
index d05b7b1..28f8fa3 100644
--- a/math/test/testcases/directed/erff.tst
+++ b/math/test/testcases/directed/erff.tst
@@ -1,7 +1,7 @@
; erff.tst
;
; Copyright (c) 2007-2020, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=erff op1=7fc00001 result=7fc00001 errno=0
func=erff op1=ffc00001 result=7fc00001 errno=0
diff --git a/math/test/testcases/directed/exp.tst b/math/test/testcases/directed/exp.tst
index 85d556c..0bb2ef4 100644
--- a/math/test/testcases/directed/exp.tst
+++ b/math/test/testcases/directed/exp.tst
@@ -1,7 +1,7 @@
; Directed test cases for exp
;
; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=exp op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=exp op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/math/test/testcases/directed/exp2.tst b/math/test/testcases/directed/exp2.tst
index fa56c9f..7069f90 100644
--- a/math/test/testcases/directed/exp2.tst
+++ b/math/test/testcases/directed/exp2.tst
@@ -1,7 +1,7 @@
; Directed test cases for exp2
;
; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=exp2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=exp2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/math/test/testcases/directed/exp2f.tst b/math/test/testcases/directed/exp2f.tst
index 38cfc3f..6ca2eea 100644
--- a/math/test/testcases/directed/exp2f.tst
+++ b/math/test/testcases/directed/exp2f.tst
@@ -1,7 +1,7 @@
; exp2f.tst - Directed test cases for exp2f
;
; Copyright (c) 2017-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=exp2f op1=7fc00001 result=7fc00001 errno=0
func=exp2f op1=ffc00001 result=7fc00001 errno=0
diff --git a/math/test/testcases/directed/expf.tst b/math/test/testcases/directed/expf.tst
index ff0f671..89ae8fe 100644
--- a/math/test/testcases/directed/expf.tst
+++ b/math/test/testcases/directed/expf.tst
@@ -1,7 +1,7 @@
; expf.tst - Directed test cases for expf
;
; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=expf op1=7fc00001 result=7fc00001 errno=0
func=expf op1=ffc00001 result=7fc00001 errno=0
diff --git a/math/test/testcases/directed/log.tst b/math/test/testcases/directed/log.tst
index a0aa398..686ea83 100644
--- a/math/test/testcases/directed/log.tst
+++ b/math/test/testcases/directed/log.tst
@@ -1,7 +1,7 @@
; Directed test cases for log
;
; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=log op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=log op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/math/test/testcases/directed/log2.tst b/math/test/testcases/directed/log2.tst
index ff1286c..361bdde 100644
--- a/math/test/testcases/directed/log2.tst
+++ b/math/test/testcases/directed/log2.tst
@@ -1,7 +1,7 @@
; Directed test cases for log2
;
; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/math/test/testcases/directed/log2f.tst b/math/test/testcases/directed/log2f.tst
index 5832c4f..5fce051 100644
--- a/math/test/testcases/directed/log2f.tst
+++ b/math/test/testcases/directed/log2f.tst
@@ -1,7 +1,7 @@
; log2f.tst - Directed test cases for log2f
;
; Copyright (c) 2017-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=log2f op1=7fc00001 result=7fc00001 errno=0
func=log2f op1=ffc00001 result=7fc00001 errno=0
diff --git a/math/test/testcases/directed/logf.tst b/math/test/testcases/directed/logf.tst
index 6e68a36..a6d1b9d 100644
--- a/math/test/testcases/directed/logf.tst
+++ b/math/test/testcases/directed/logf.tst
@@ -1,7 +1,7 @@
; logf.tst - Directed test cases for logf
;
; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=logf op1=7fc00001 result=7fc00001 errno=0
func=logf op1=ffc00001 result=7fc00001 errno=0
diff --git a/math/test/testcases/directed/pow.tst b/math/test/testcases/directed/pow.tst
index 1966581..879d128 100644
--- a/math/test/testcases/directed/pow.tst
+++ b/math/test/testcases/directed/pow.tst
@@ -1,7 +1,7 @@
; Directed test cases for pow
;
; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=pow op1=00000000.00000000 op2=00000000.00000000 result=3ff00000.00000000 errno=0
func=pow op1=00000000.00000000 op2=00000000.00000001 result=00000000.00000000 errno=0
diff --git a/math/test/testcases/directed/powf.tst b/math/test/testcases/directed/powf.tst
index 3fa8b11..46d5224 100644
--- a/math/test/testcases/directed/powf.tst
+++ b/math/test/testcases/directed/powf.tst
@@ -1,7 +1,7 @@
; powf.tst - Directed test cases for powf
;
; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
diff --git a/math/test/testcases/directed/sincosf.tst b/math/test/testcases/directed/sincosf.tst
index 4b33d22..cddb346 100644
--- a/math/test/testcases/directed/sincosf.tst
+++ b/math/test/testcases/directed/sincosf.tst
@@ -1,7 +1,7 @@
; Directed test cases for SP sincos
;
; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=sincosf_sinf op1=7fc00001 result=7fc00001 errno=0
diff --git a/math/test/testcases/directed/sinf.tst b/math/test/testcases/directed/sinf.tst
index ded80b1..041b13d 100644
--- a/math/test/testcases/directed/sinf.tst
+++ b/math/test/testcases/directed/sinf.tst
@@ -1,7 +1,7 @@
; sinf.tst - Directed test cases for SP sine
;
; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
func=sinf op1=7fc00001 result=7fc00001 errno=0
diff --git a/math/test/testcases/random/double.tst b/math/test/testcases/random/double.tst
index c24ff80..8e885d6 100644
--- a/math/test/testcases/random/double.tst
+++ b/math/test/testcases/random/double.tst
@@ -1,7 +1,7 @@
!! double.tst - Random test case specification for DP functions
!!
!! Copyright (c) 1999-2019, Arm Limited.
-!! SPDX-License-Identifier: MIT
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
test exp 10000
test exp2 10000
diff --git a/math/test/testcases/random/float.tst b/math/test/testcases/random/float.tst
index d02a227..ea4a5a0 100644
--- a/math/test/testcases/random/float.tst
+++ b/math/test/testcases/random/float.tst
@@ -1,7 +1,7 @@
!! single.tst - Random test case specification for SP functions
!!
!! Copyright (c) 1999-2019, Arm Limited.
-!! SPDX-License-Identifier: MIT
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
test sinf 10000
test cosf 10000
diff --git a/math/test/ulp.c b/math/test/ulp.c
index 51479b8..bb8c3ad 100644
--- a/math/test/ulp.c
+++ b/math/test/ulp.c
@@ -1,8 +1,8 @@
/*
* ULP error checking tool for math functions.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <ctype.h>
@@ -214,16 +214,6 @@ struct conf
double errlim;
};
-/* Wrappers for sincos. */
-static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
-static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
-static double sincos_sin(double x) {(void)cos(x); return sin(x);}
-static double sincos_cos(double x) {(void)sin(x); return cos(x);}
-#if USE_MPFR
-static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
-static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
-#endif
-
/* A bit of a hack: call vector functions twice with the same
input in lane 0 but a different value in other lanes: once
with an in-range value and then with a special case value. */
@@ -233,50 +223,79 @@ static int secondcall;
#if __aarch64__ && WANT_VMATH
typedef __f32x4_t v_float;
typedef __f64x2_t v_double;
-static const float fv[2] = {1.0f, -INFINITY};
-static const double dv[2] = {1.0, -INFINITY};
+/* First element of fv and dv may be changed by -c argument. */
+static float fv[2] = {1.0f, -INFINITY};
+static double dv[2] = {1.0, -INFINITY};
static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; }
static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; }
+#if WANT_SVE_MATH
+#include <arm_sve.h>
+typedef __SVFloat32_t sv_float;
+typedef __SVFloat64_t sv_double;
+
+static inline sv_float svargf(float x) {
+ int n = svcntw();
+ float base[n];
+ for (int i=0; i<n; i++)
+ base[i] = (float)x;
+ base[n-1] = (float) fv[secondcall];
+ return svld1(svptrue_b32(), base);
+}
+static inline sv_double svargd(double x) {
+ int n = svcntd();
+ double base[n];
+ for (int i=0; i<n; i++)
+ base[i] = x;
+ base[n-1] = dv[secondcall];
+ return svld1(svptrue_b64(), base);
+}
+static inline float svretf(sv_float vec) {
+ int n = svcntw();
+ float res[n];
+ svst1(svptrue_b32(), res, vec);
+ return res[0];
+}
+static inline double svretd(sv_double vec) {
+ int n = svcntd();
+ double res[n];
+ svst1(svptrue_b64(), res, vec);
+ return res[0];
+}
+#endif
+#endif
+
+#if WANT_SVE_MATH
+long double
+dummyl (long double x)
+{
+ return x;
+}
+
+double
+dummy (double x)
+{
+ return x;
+}
+
+static sv_double
+__sv_dummy (sv_double x)
+{
+ return x;
+}
-static float v_sinf(float x) { return __v_sinf(argf(x))[0]; }
-static float v_cosf(float x) { return __v_cosf(argf(x))[0]; }
-static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; }
-static float v_expf(float x) { return __v_expf(argf(x))[0]; }
-static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; }
-static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; }
-static float v_logf(float x) { return __v_logf(argf(x))[0]; }
-static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; }
-static double v_sin(double x) { return __v_sin(argd(x))[0]; }
-static double v_cos(double x) { return __v_cos(argd(x))[0]; }
-static double v_exp(double x) { return __v_exp(argd(x))[0]; }
-static double v_log(double x) { return __v_log(argd(x))[0]; }
-static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
-#ifdef __vpcs
-static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
-static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
-static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; }
-static float vn_expf(float x) { return __vn_expf(argf(x))[0]; }
-static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; }
-static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; }
-static float vn_logf(float x) { return __vn_logf(argf(x))[0]; }
-static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; }
-static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
-static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
-static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
-static double vn_log(double x) { return __vn_log(argd(x))[0]; }
-static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
-static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
-static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
-static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
-static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
-static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
-static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
-static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
-static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
-static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
-static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
-static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
+static sv_float
+__sv_dummyf (sv_float x)
+{
+ return x;
+}
#endif
+
+#include "test/ulp_wrappers.h"
+
+/* Wrappers for SVE functions. */
+#if WANT_SVE_MATH
+static double sv_dummy (double x) { return svretd (__sv_dummy (svargd (x))); }
+static float sv_dummyf (float x) { return svretf (__sv_dummyf (svargf (x))); }
#endif
struct fun
@@ -322,83 +341,53 @@ static const struct fun fun[] = {
#define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0)
#define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0)
#define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0)
- F1 (sin)
- F1 (cos)
- F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
- F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
- F1 (exp)
- F1 (exp2)
- F1 (log)
- F1 (log2)
- F2 (pow)
- F1 (erf)
- D1 (exp)
- D1 (exp2)
- D1 (log)
- D1 (log2)
- D2 (pow)
- D1 (erf)
-#if WANT_VMATH
- F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0)
- F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0)
- F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0)
- F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0)
- F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0)
- F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0)
- F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0)
- F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0)
- F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0)
- F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
- F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
- F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
- F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
-#if __aarch64__
- F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
- F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
- F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
- F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1)
- F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1)
- F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1)
- F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1)
- F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
- F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
- F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
- F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
-#ifdef __vpcs
- F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
- F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
- F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
- F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1)
- F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1)
- F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1)
- F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1)
- F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
- F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
- F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
- F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
- F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
- F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
- F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
- F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
- F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
- F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
- F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
- F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
- F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
- F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
-#endif
-#endif
+/* Neon routines. */
+#define VF1(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define VF2(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define VD1(x) F (__v_##x, v_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define VD2(x) F (__v_##x, v_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define VNF1(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define VNF2(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define VND1(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define VND2(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZVF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZVF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZVD1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZVD2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZVNF1(x) VNF1 (x) ZVF1 (x)
+#define ZVNF2(x) VNF2 (x) ZVF2 (x)
+#define ZVND1(x) VND1 (x) ZVD1 (x)
+#define ZVND2(x) VND2 (x) ZVD2 (x)
+#define SF1(x) F (__s_##x##f, __s_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define SF2(x) F (__s_##x##f, __s_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define SD1(x) F (__s_##x, __s_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define SD2(x) F (__s_##x, __s_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+/* SVE routines. */
+#define SVF1(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define SVF2(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define SVD1(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define SVD2(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZSVF1(x) F (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZSVF2(x) F (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZSVD1(x) F (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZSVD2(x) F (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+
+#include "test/ulp_funcs.h"
+
+#if WANT_SVE_MATH
+ SVD1 (dummy)
+ SVF1 (dummy)
#endif
+
#undef F
#undef F1
#undef F2
#undef D1
#undef D2
+#undef SVF1
+#undef SVF2
+#undef SVD1
+#undef SVD2
{0}};
/* Boilerplate for generic calls. */
@@ -645,6 +634,11 @@ usage (void)
puts ("-q: quiet.");
puts ("-m: use mpfr even if faster method is available.");
puts ("-f: disable fenv testing (rounding modes and exceptions).");
+#if __aarch64__ && WANT_VMATH
+ puts ("-c: neutral 'control value' to test behaviour when one lane can affect another. \n"
+ " This should be different from tested input in other lanes, and non-special \n"
+ " (i.e. should not trigger fenv exceptions). Default is 1.");
+#endif
puts ("Supported func:");
for (const struct fun *f = fun; f->name; f++)
printf ("\t%s\n", f->name);
@@ -812,6 +806,14 @@ main (int argc, char *argv[])
conf.rc = argv[0][0];
}
break;
+#if __aarch64__ && WANT_VMATH
+ case 'c':
+ argc--;
+ argv++;
+ fv[0] = strtof(argv[0], 0);
+ dv[0] = strtod(argv[0], 0);
+ break;
+#endif
default:
usage ();
}
diff --git a/math/test/ulp.h b/math/test/ulp.h
index a0c3016..327b4bd 100644
--- a/math/test/ulp.h
+++ b/math/test/ulp.h
@@ -2,7 +2,7 @@
* Generic functions for ULP error estimation.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* For each different math function type,
diff --git a/math/test/ulp_funcs.h b/math/test/ulp_funcs.h
new file mode 100644
index 0000000..f5cea4d
--- /dev/null
+++ b/math/test/ulp_funcs.h
@@ -0,0 +1,78 @@
+/*
+ * Function entries for ulp.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+ F1 (sin)
+ F1 (cos)
+ F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
+ F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
+ F1 (exp)
+ F1 (exp2)
+ F1 (log)
+ F1 (log2)
+ F2 (pow)
+ F1 (erf)
+ D1 (exp)
+ D1 (exp2)
+ D1 (log)
+ D1 (log2)
+ D2 (pow)
+ D1 (erf)
+#if WANT_VMATH
+ F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0)
+ F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0)
+ F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0)
+ F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0)
+ F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0)
+ F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0)
+ F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0)
+ F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0)
+ F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+ F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+ F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
+ F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
+ F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
+#if __aarch64__
+ F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
+ F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+ F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
+ F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1)
+ F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1)
+ F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
+ F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
+ F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
+#ifdef __vpcs
+ F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
+ F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+ F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
+ F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1)
+ F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1)
+ F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
+ F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
+ F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
+ F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
+ F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+ F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
+ F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
+ F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
+ F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
+ F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
+#endif
+#endif
+#endif
diff --git a/math/test/ulp_wrappers.h b/math/test/ulp_wrappers.h
new file mode 100644
index 0000000..fd9e00c
--- /dev/null
+++ b/math/test/ulp_wrappers.h
@@ -0,0 +1,59 @@
+/*
+ * Function wrappers for ulp.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* Wrappers for sincos. */
+static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
+static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
+static double sincos_sin(double x) {(void)cos(x); return sin(x);}
+static double sincos_cos(double x) {(void)sin(x); return cos(x);}
+#if USE_MPFR
+static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
+static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
+#endif
+
+/* Wrappers for vector functions. */
+#if __aarch64__ && WANT_VMATH
+static float v_sinf(float x) { return __v_sinf(argf(x))[0]; }
+static float v_cosf(float x) { return __v_cosf(argf(x))[0]; }
+static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; }
+static float v_expf(float x) { return __v_expf(argf(x))[0]; }
+static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; }
+static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; }
+static float v_logf(float x) { return __v_logf(argf(x))[0]; }
+static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; }
+static double v_sin(double x) { return __v_sin(argd(x))[0]; }
+static double v_cos(double x) { return __v_cos(argd(x))[0]; }
+static double v_exp(double x) { return __v_exp(argd(x))[0]; }
+static double v_log(double x) { return __v_log(argd(x))[0]; }
+static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
+#ifdef __vpcs
+static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
+static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
+static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; }
+static float vn_expf(float x) { return __vn_expf(argf(x))[0]; }
+static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; }
+static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; }
+static float vn_logf(float x) { return __vn_logf(argf(x))[0]; }
+static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; }
+static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
+static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
+static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
+static double vn_log(double x) { return __vn_log(argd(x))[0]; }
+static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
+static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
+static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
+static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
+static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
+static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
+static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
+static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
+static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
+static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
+static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
+static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
+#endif
+#endif
diff --git a/math/tools/cos.sollya b/math/tools/cos.sollya
index bd72d6b..6690adf 100644
--- a/math/tools/cos.sollya
+++ b/math/tools/cos.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating cos(x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 8; // polynomial degree
a = -pi/4; // interval
diff --git a/math/tools/exp.sollya b/math/tools/exp.sollya
index b7a462c..0668bdb 100644
--- a/math/tools/exp.sollya
+++ b/math/tools/exp.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating e^x
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 5; // poly degree
N = 128; // table entries
diff --git a/math/tools/exp2.sollya b/math/tools/exp2.sollya
index e760769..bd0a42d 100644
--- a/math/tools/exp2.sollya
+++ b/math/tools/exp2.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating 2^x
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
// exp2f parameters
deg = 3; // poly degree
diff --git a/math/tools/log.sollya b/math/tools/log.sollya
index 6df4db4..5288f55 100644
--- a/math/tools/log.sollya
+++ b/math/tools/log.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating log(1+x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 12; // poly degree
// |log(1+x)| > 0x1p-4 outside the interval
diff --git a/math/tools/log2.sollya b/math/tools/log2.sollya
index 4a364c0..85811be 100644
--- a/math/tools/log2.sollya
+++ b/math/tools/log2.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating log2(1+x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 11; // poly degree
// |log2(1+x)| > 0x1p-4 outside the interval
diff --git a/math/tools/log2_abs.sollya b/math/tools/log2_abs.sollya
index 82c4dac..d018ba0 100644
--- a/math/tools/log2_abs.sollya
+++ b/math/tools/log2_abs.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating log2(1+x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 7; // poly degree
// interval ~= 1/(2*N), where N is the table entries
diff --git a/math/tools/log_abs.sollya b/math/tools/log_abs.sollya
index a2ac190..5f9bfe4 100644
--- a/math/tools/log_abs.sollya
+++ b/math/tools/log_abs.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating log(1+x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 6; // poly degree
// interval ~= 1/(2*N), where N is the table entries
diff --git a/math/tools/plot.py b/math/tools/plot.py
index 6c8b89f..a0fa023 100755
--- a/math/tools/plot.py
+++ b/math/tools/plot.py
@@ -3,7 +3,7 @@
# ULP error plot tool.
#
# Copyright (c) 2019, Arm Limited.
-# SPDX-License-Identifier: MIT
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
import numpy as np
import matplotlib.pyplot as plt
diff --git a/math/tools/remez.jl b/math/tools/remez.jl
index 2ff436f..1deab67 100755
--- a/math/tools/remez.jl
+++ b/math/tools/remez.jl
@@ -4,7 +4,7 @@
# remez.jl - implementation of the Remez algorithm for polynomial approximation
#
# Copyright (c) 2015-2019, Arm Limited.
-# SPDX-License-Identifier: MIT
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
import Base.\
diff --git a/math/tools/sin.sollya b/math/tools/sin.sollya
index a6e8511..a193000 100644
--- a/math/tools/sin.sollya
+++ b/math/tools/sin.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating sin(x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 7; // polynomial degree
a = -pi/4; // interval
diff --git a/math/tools/v_exp.sollya b/math/tools/v_exp.sollya
index c0abb63..5fa7de7 100644
--- a/math/tools/v_exp.sollya
+++ b/math/tools/v_exp.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating e^x
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 4; // poly degree
N = 128; // table entries
diff --git a/math/tools/v_log.sollya b/math/tools/v_log.sollya
index cc3d2c4..d982524 100644
--- a/math/tools/v_log.sollya
+++ b/math/tools/v_log.sollya
@@ -1,7 +1,7 @@
// polynomial used for __v_log(x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 6; // poly degree
a = -0x1.fc1p-9;
diff --git a/math/tools/v_sin.sollya b/math/tools/v_sin.sollya
index 65cc995..63b9d65 100644
--- a/math/tools/v_sin.sollya
+++ b/math/tools/v_sin.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating sin(x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 15; // polynomial degree
a = -pi/2; // interval
diff --git a/math/v_cos.c b/math/v_cos.c
index 20ba6bd..4c8787e 100644
--- a/math/v_cos.c
+++ b/math/v_cos.c
@@ -1,8 +1,8 @@
/*
* Double-precision vector cos function.
*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
@@ -55,6 +55,14 @@ V_NAME(cos) (v_f64_t x)
r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u64 (cmp)))
+ /* If fenv exceptions are to be triggered correctly, set any special lanes
+ to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+ specialcase later. */
+ r = v_sel_f64 (cmp, v_f64 (1.0), r);
+#endif
+
/* n = rint((|x|+pi/2)/pi) - 0.5. */
n = v_fma_f64 (InvPi, r + HalfPi, Shift);
odd = v_as_u64_f64 (n) << 63;
diff --git a/math/v_cosf.c b/math/v_cosf.c
index 150294b..bd677c3 100644
--- a/math/v_cosf.c
+++ b/math/v_cosf.c
@@ -1,8 +1,8 @@
/*
* Single-precision vector cos function.
*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
@@ -47,6 +47,14 @@ V_NAME(cosf) (v_f32_t x)
r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (cmp)))
+ /* If fenv exceptions are to be triggered correctly, set any special lanes
+ to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+ specialcase later. */
+ r = v_sel_f32 (cmp, v_f32 (1.0f), r);
+#endif
+
/* n = rint((|x|+pi/2)/pi) - 0.5 */
n = v_fma_f32 (InvPi, r + HalfPi, Shift);
odd = v_as_u32_f32 (n) << 31;
diff --git a/math/v_exp.c b/math/v_exp.c
index e459d53..da23fd1 100644
--- a/math/v_exp.c
+++ b/math/v_exp.c
@@ -1,8 +1,8 @@
/*
* Double-precision vector e^x function.
*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
@@ -36,6 +36,22 @@
#define Tab __v_exp_data
#define IndexMask v_u64 (N - 1)
#define Shift v_f64 (0x1.8p+52)
+
+#if WANT_SIMD_EXCEPT
+
+#define TinyBound 0x200 /* top12 (asuint64 (0x1p-511)). */
+#define BigBound 0x408 /* top12 (asuint64 (0x1p9)). */
+
+VPCS_ATTR static NOINLINE v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
+{
+ /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+ routine to special lanes. */
+ return v_call_f64 (exp, x, y, cmp);
+}
+
+#else
+
#define Thres v_f64 (704.0)
VPCS_ATTR
@@ -54,6 +70,8 @@ specialcase (v_f64_t s, v_f64_t y, v_f64_t n)
return v_as_f64_u64 ((cmp & v_as_u64_f64 (r1)) | (~cmp & v_as_u64_f64 (r0)));
}
+#endif
+
VPCS_ATTR
v_f64_t
V_NAME(exp) (v_f64_t x)
@@ -61,7 +79,18 @@ V_NAME(exp) (v_f64_t x)
v_f64_t n, r, r2, s, y, z;
v_u64_t cmp, u, e, i;
+#if WANT_SIMD_EXCEPT
+ /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+ specialcase to fix special lanes later. This is only necessary if fenv
+ exceptions are to be triggered correctly. */
+ v_f64_t xm = x;
+ cmp = v_cond_u64 ((v_as_u64_f64 (v_abs_f64 (x)) >> 52) - TinyBound
+ >= BigBound - TinyBound);
+ if (unlikely (v_any_u64 (cmp)))
+ x = v_sel_f64 (cmp, v_f64 (1), x);
+#else
cmp = v_cond_u64 (v_abs_f64 (x) > Thres);
+#endif
/* n = round(x/(ln2/N)). */
z = v_fma_f64 (x, InvLn2, Shift);
@@ -87,7 +116,12 @@ V_NAME(exp) (v_f64_t x)
s = v_as_f64_u64 (u + e);
if (unlikely (v_any_u64 (cmp)))
+#if WANT_SIMD_EXCEPT
+ return specialcase (xm, v_fma_f64 (y, s, s), cmp);
+#else
return specialcase (s, y, n);
+#endif
+
return v_fma_f64 (y, s, s);
}
VPCS_ALIAS
diff --git a/math/v_exp.h b/math/v_exp.h
index 305da19..1e7f7f3 100644
--- a/math/v_exp.h
+++ b/math/v_exp.h
@@ -2,7 +2,7 @@
* Declarations for double-precision e^x vector function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "v_math.h"
diff --git a/math/v_exp2f.c b/math/v_exp2f.c
index e3ea5af..7f40dba 100644
--- a/math/v_exp2f.c
+++ b/math/v_exp2f.c
@@ -1,8 +1,8 @@
/*
* Single-precision vector 2^x function.
*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
@@ -25,6 +25,22 @@ static const float Poly[] = {
#define Shift v_f32 (0x1.8p23f)
+#if WANT_SIMD_EXCEPT
+
+#define TinyBound 0x20000000 /* asuint (0x1p-63). */
+#define BigBound 0x42800000 /* asuint (0x1p6). */
+
+VPCS_ATTR
+static NOINLINE v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
+{
+ /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+ routine to special lanes. */
+ return v_call_f32 (exp2f, x, y, cmp);
+}
+
+#else
+
VPCS_ATTR
static v_f32_t
specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale)
@@ -41,15 +57,28 @@ specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f
return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0));
}
+#endif
+
VPCS_ATTR
v_f32_t
V_NAME(exp2f) (v_f32_t x)
{
- v_f32_t n, r, r2, scale, p, q, poly, absn;
+ v_f32_t n, r, r2, scale, p, q, poly;
v_u32_t cmp, e;
- /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
- x = n + r, with r in [-1/2, 1/2]. */
+#if WANT_SIMD_EXCEPT
+ cmp = v_cond_u32 ((v_as_u32_f32 (x) & 0x7fffffff) - TinyBound
+ >= BigBound - TinyBound);
+ v_f32_t xm = x;
+ /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+ specialcase to fix special lanes later. This is only necessary if fenv
+ exceptions are to be triggered correctly. */
+ if (unlikely (v_any_u32 (cmp)))
+ x = v_sel_f32 (cmp, v_f32 (1), x);
+#endif
+
+ /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = n + r, with r in [-1/2, 1/2]. */
#if 0
v_f32_t z;
z = x + Shift;
@@ -62,16 +91,26 @@ V_NAME(exp2f) (v_f32_t x)
e = v_as_u32_s32 (v_round_s32 (x)) << 23;
#endif
scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
- absn = v_abs_f32 (n);
+
+#if !WANT_SIMD_EXCEPT
+ v_f32_t absn = v_abs_f32 (n);
cmp = v_cond_u32 (absn > v_f32 (126.0f));
+#endif
+
r2 = r * r;
p = v_fma_f32 (C0, r, C1);
q = v_fma_f32 (C2, r, C3);
q = v_fma_f32 (p, r2, q);
p = C4 * r;
poly = v_fma_f32 (q, r2, p);
+
if (unlikely (v_any_u32 (cmp)))
+#if WANT_SIMD_EXCEPT
+ return specialcase (xm, v_fma_f32 (poly, scale, scale), cmp);
+#else
return specialcase (poly, n, e, absn, cmp, scale);
+#endif
+
return v_fma_f32 (poly, scale, scale);
}
VPCS_ALIAS
diff --git a/math/v_exp2f_1u.c b/math/v_exp2f_1u.c
index 1caa14d..de1a32d 100644
--- a/math/v_exp2f_1u.c
+++ b/math/v_exp2f_1u.c
@@ -2,7 +2,7 @@
* Single-precision vector 2^x function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
diff --git a/math/v_exp_data.c b/math/v_exp_data.c
index 3653554..30421da 100644
--- a/math/v_exp_data.c
+++ b/math/v_exp_data.c
@@ -2,7 +2,7 @@
* Lookup table for double-precision e^x vector function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "v_exp.h"
diff --git a/math/v_expf.c b/math/v_expf.c
index d403e00..ade23b2 100644
--- a/math/v_expf.c
+++ b/math/v_expf.c
@@ -1,8 +1,8 @@
/*
* Single-precision vector e^x function.
*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
@@ -28,6 +28,22 @@ static const float Poly[] = {
#define Ln2hi v_f32 (0x1.62e4p-1f)
#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
+#if WANT_SIMD_EXCEPT
+
+#define TinyBound 0x20000000 /* asuint (0x1p-63). */
+#define BigBound 0x42800000 /* asuint (0x1p6). */
+
+VPCS_ATTR
+static NOINLINE v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
+{
+ /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+ routine to special lanes. */
+ return v_call_f32 (expf, x, y, cmp);
+}
+
+#else
+
VPCS_ATTR
static v_f32_t
specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale)
@@ -44,15 +60,28 @@ specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f
return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0));
}
+#endif
+
VPCS_ATTR
v_f32_t
V_NAME(expf) (v_f32_t x)
{
- v_f32_t n, r, r2, scale, p, q, poly, absn, z;
+ v_f32_t n, r, r2, scale, p, q, poly, z;
v_u32_t cmp, e;
- /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
- x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+#if WANT_SIMD_EXCEPT
+ cmp = v_cond_u32 ((v_as_u32_f32 (x) & 0x7fffffff) - TinyBound
+ >= BigBound - TinyBound);
+ v_f32_t xm = x;
+ /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+ specialcase to fix special lanes later. This is only necessary if fenv
+ exceptions are to be triggered correctly. */
+ if (unlikely (v_any_u32 (cmp)))
+ x = v_sel_f32 (cmp, v_f32 (1), x);
+#endif
+
+ /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
#if 1
z = v_fma_f32 (x, InvLn2, Shift);
n = z - Shift;
@@ -67,16 +96,26 @@ V_NAME(expf) (v_f32_t x)
e = v_as_u32_s32 (v_round_s32 (z)) << 23;
#endif
scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
- absn = v_abs_f32 (n);
+
+#if !WANT_SIMD_EXCEPT
+ v_f32_t absn = v_abs_f32 (n);
cmp = v_cond_u32 (absn > v_f32 (126.0f));
+#endif
+
r2 = r * r;
p = v_fma_f32 (C0, r, C1);
q = v_fma_f32 (C2, r, C3);
q = v_fma_f32 (p, r2, q);
p = C4 * r;
poly = v_fma_f32 (q, r2, p);
+
if (unlikely (v_any_u32 (cmp)))
+#if WANT_SIMD_EXCEPT
+ return specialcase (xm, v_fma_f32 (poly, scale, scale), cmp);
+#else
return specialcase (poly, n, e, absn, cmp, scale);
+#endif
+
return v_fma_f32 (poly, scale, scale);
}
VPCS_ALIAS
diff --git a/math/v_expf_1u.c b/math/v_expf_1u.c
index 023bd24..8f0ae91 100644
--- a/math/v_expf_1u.c
+++ b/math/v_expf_1u.c
@@ -2,7 +2,7 @@
* Single-precision vector e^x function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
diff --git a/math/v_log.c b/math/v_log.c
index d84c740..47a8291 100644
--- a/math/v_log.c
+++ b/math/v_log.c
@@ -2,7 +2,7 @@
* Double-precision vector log(x) function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
diff --git a/math/v_log.h b/math/v_log.h
index bcc2fa6..a37bbc2 100644
--- a/math/v_log.h
+++ b/math/v_log.h
@@ -2,7 +2,7 @@
* Declarations for double-precision log(x) vector function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "v_math.h"
diff --git a/math/v_log_data.c b/math/v_log_data.c
index 97ee5b0..ec1c8e5 100644
--- a/math/v_log_data.c
+++ b/math/v_log_data.c
@@ -2,7 +2,7 @@
* Lookup table for double-precision log(x) vector function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "v_log.h"
diff --git a/math/v_logf.c b/math/v_logf.c
index 7373192..93a5375 100644
--- a/math/v_logf.c
+++ b/math/v_logf.c
@@ -2,7 +2,7 @@
* Single-precision vector log function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
diff --git a/math/v_math.h b/math/v_math.h
index f2cc467..3289916 100644
--- a/math/v_math.h
+++ b/math/v_math.h
@@ -1,8 +1,8 @@
/*
* Vector math abstractions.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef _V_MATH_H
@@ -191,6 +191,11 @@ v_round_s32 (v_f32_t x)
{
return __builtin_lroundf (x); /* relies on -fno-math-errno. */
}
+static inline v_f32_t
+v_sel_f32 (v_u32_t p, v_f32_t x, v_f32_t y)
+{
+ return p ? x : y;
+}
/* convert to type1 from type2. */
static inline v_f32_t
v_to_f32_s32 (v_s32_t x)
@@ -311,6 +316,11 @@ v_round_s64 (v_f64_t x)
{
return __builtin_lround (x); /* relies on -fno-math-errno. */
}
+static inline v_f64_t
+v_sel_f64 (v_u64_t p, v_f64_t x, v_f64_t y)
+{
+ return p ? x : y;
+}
/* convert to type1 from type2. */
static inline v_f64_t
v_to_f64_s64 (v_s64_t x)
@@ -460,6 +470,11 @@ v_round_s32 (v_f32_t x)
{
return vcvtaq_s32_f32 (x);
}
+static inline v_f32_t
+v_sel_f32 (v_u32_t p, v_f32_t x, v_f32_t y)
+{
+ return vbslq_f32 (p, x, y);
+}
/* convert to type1 from type2. */
static inline v_f32_t
v_to_f32_s32 (v_s32_t x)
@@ -584,6 +599,11 @@ v_round_s64 (v_f64_t x)
{
return vcvtaq_s64_f64 (x);
}
+static inline v_f64_t
+v_sel_f64 (v_u64_t p, v_f64_t x, v_f64_t y)
+{
+ return vbslq_f64 (p, x, y);
+}
/* convert to type1 from type2. */
static inline v_f64_t
v_to_f64_s64 (v_s64_t x)
diff --git a/math/v_pow.c b/math/v_pow.c
index a209d57..05a83aa 100644
--- a/math/v_pow.c
+++ b/math/v_pow.c
@@ -2,7 +2,7 @@
* Double-precision vector pow function.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
diff --git a/math/v_powf.c b/math/v_powf.c
index fb80fa6..ad8ab8d 100644
--- a/math/v_powf.c
+++ b/math/v_powf.c
@@ -2,7 +2,7 @@
* Single-precision vector powf function.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
diff --git a/math/v_sin.c b/math/v_sin.c
index 2b9ed05..9dbb9de 100644
--- a/math/v_sin.c
+++ b/math/v_sin.c
@@ -1,8 +1,8 @@
/*
* Double-precision vector sin function.
*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
@@ -34,9 +34,15 @@ static const double Poly[] = {
#define Pi2 v_f64 (0x1.1a62633145c06p-53)
#define Pi3 v_f64 (0x1.c1cd129024e09p-106)
#define Shift v_f64 (0x1.8p52)
-#define RangeVal v_f64 (0x1p23)
#define AbsMask v_u64 (0x7fffffffffffffff)
+#if WANT_SIMD_EXCEPT
+#define TinyBound 0x202 /* top12 (asuint64 (0x1p-509)). */
+#define Thresh 0x214 /* top12 (asuint64 (RangeVal)) - TinyBound. */
+#else
+#define RangeVal v_f64 (0x1p23)
+#endif
+
VPCS_ATTR
__attribute__ ((noinline)) static v_f64_t
specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
@@ -49,11 +55,22 @@ v_f64_t
V_NAME(sin) (v_f64_t x)
{
v_f64_t n, r, r2, y;
- v_u64_t sign, odd, cmp;
+ v_u64_t sign, odd, cmp, ir;
- r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
+ ir = v_as_u64_f64 (x) & AbsMask;
+ r = v_as_f64_u64 (ir);
sign = v_as_u64_f64 (x) & ~AbsMask;
- cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
+
+#if WANT_SIMD_EXCEPT
+ /* Detect |x| <= 0x1p-509 or |x| >= RangeVal. If fenv exceptions are to be
+ triggered correctly, set any special lanes to 1 (which is neutral w.r.t.
+ fenv). These lanes will be fixed by specialcase later. */
+ cmp = v_cond_u64 ((ir >> 52) - TinyBound >= Thresh);
+ if (unlikely (v_any_u64 (cmp)))
+ r = v_sel_f64 (cmp, v_f64 (1), r);
+#else
+ cmp = v_cond_u64 (ir >= v_as_u64_f64 (RangeVal));
+#endif
/* n = rint(|x|/pi). */
n = v_fma_f64 (InvPi, r, Shift);
diff --git a/math/v_sinf.c b/math/v_sinf.c
index e66bfce..ce35dac 100644
--- a/math/v_sinf.c
+++ b/math/v_sinf.c
@@ -1,8 +1,8 @@
/*
* Single-precision vector sin function.
*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
@@ -24,6 +24,7 @@ static const float Poly[] = {
#define A7 v_f32 (Poly[1])
#define A9 v_f32 (Poly[0])
#define RangeVal v_f32 (0x1p20f)
+#define TinyBound v_f32 (0x1p-61f)
#define InvPi v_f32 (0x1.45f306p-2f)
#define Shift v_f32 (0x1.8p+23f)
#define AbsMask v_u32 (0x7fffffff)
@@ -41,11 +42,23 @@ v_f32_t
V_NAME(sinf) (v_f32_t x)
{
v_f32_t n, r, r2, y;
- v_u32_t sign, odd, cmp;
+ v_u32_t sign, odd, cmp, ir;
- r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
+ ir = v_as_u32_f32 (x) & AbsMask;
+ r = v_as_f32_u32 (ir);
sign = v_as_u32_f32 (x) & ~AbsMask;
- cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
+
+#if WANT_SIMD_EXCEPT
+ cmp = v_cond_u32 ((ir - v_as_u32_f32 (TinyBound)
+ >= v_as_u32_f32 (RangeVal) - v_as_u32_f32 (TinyBound)));
+ if (unlikely (v_any_u32 (cmp)))
+ /* If fenv exceptions are to be triggered correctly, set any special lanes
+ to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+ specialcase later. */
+ r = v_sel_f32 (cmp, v_f32 (1), r);
+#else
+ cmp = v_cond_u32 (ir >= v_as_u32_f32 (RangeVal));
+#endif
/* n = rint(|x|/pi) */
n = v_fma_f32 (InvPi, r, Shift);
diff --git a/math/vn_cos.c b/math/vn_cos.c
index b57a549..4b5b237 100644
--- a/math/vn_cos.c
+++ b/math/vn_cos.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_cos.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_cosf.c b/math/vn_cosf.c
index 6321d46..86dd26e 100644
--- a/math/vn_cosf.c
+++ b/math/vn_cosf.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_cosf.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_exp.c b/math/vn_exp.c
index 06e269d..0d85b17 100644
--- a/math/vn_exp.c
+++ b/math/vn_exp.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_exp.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_exp2f.c b/math/vn_exp2f.c
index db9707e..da3bb40 100644
--- a/math/vn_exp2f.c
+++ b/math/vn_exp2f.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_exp2f.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_exp2f_1u.c b/math/vn_exp2f_1u.c
index 17bd0ab..3e3a247 100644
--- a/math/vn_exp2f_1u.c
+++ b/math/vn_exp2f_1u.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_exp2f_1u.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_expf.c b/math/vn_expf.c
index 0652907..6e91a94 100644
--- a/math/vn_expf.c
+++ b/math/vn_expf.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_expf.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_expf_1u.c b/math/vn_expf_1u.c
index 3be7768..57ae6a3 100644
--- a/math/vn_expf_1u.c
+++ b/math/vn_expf_1u.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_expf_1u.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_log.c b/math/vn_log.c
index b58fe8f..902bff1 100644
--- a/math/vn_log.c
+++ b/math/vn_log.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_log.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_logf.c b/math/vn_logf.c
index cc5b8ae..07e4936 100644
--- a/math/vn_logf.c
+++ b/math/vn_logf.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_logf.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_pow.c b/math/vn_pow.c
index 2609501..1a980ff 100644
--- a/math/vn_pow.c
+++ b/math/vn_pow.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_pow.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_powf.c b/math/vn_powf.c
index 095d07e..a42ade3 100644
--- a/math/vn_powf.c
+++ b/math/vn_powf.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_powf.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_sin.c b/math/vn_sin.c
index 905c796..64b05c8 100644
--- a/math/vn_sin.c
+++ b/math/vn_sin.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_sin.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs
diff --git a/math/vn_sinf.c b/math/vn_sinf.c
index 1214e1a..6e880c6 100644
--- a/math/vn_sinf.c
+++ b/math/vn_sinf.c
@@ -2,7 +2,7 @@
* AdvSIMD vector PCS variant of __v_sinf.
*
* Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "mathlib.h"
#ifdef __vpcs