aboutsummaryrefslogtreecommitdiff
path: root/pl
diff options
context:
space:
mode:
Diffstat (limited to 'pl')
-rw-r--r--pl/Dir.mk21
-rw-r--r--pl/README.contributors23
-rw-r--r--pl/math/Dir.mk229
-rw-r--r--pl/math/acosh_3u.c66
-rw-r--r--pl/math/acoshf_2u8.c63
-rw-r--r--pl/math/asinh_2u5.c86
-rw-r--r--pl/math/asinh_data.c22
-rw-r--r--pl/math/asinhf_3u5.c78
-rw-r--r--pl/math/asinhf_data.c15
-rw-r--r--pl/math/atan2_2u5.c159
-rw-r--r--pl/math/atan2f_3u.c167
-rw-r--r--pl/math/atan_2u5.c73
-rw-r--r--pl/math/atan_common.h49
-rw-r--r--pl/math/atan_data.c20
-rw-r--r--pl/math/atanf_2u9.c76
-rw-r--r--pl/math/atanf_common.h51
-rw-r--r--pl/math/atanf_data.c15
-rw-r--r--pl/math/atanh_3u.c86
-rw-r--r--pl/math/atanhf_3u1.c88
-rw-r--r--pl/math/cbrt_2u.c70
-rw-r--r--pl/math/cbrt_data.c15
-rw-r--r--pl/math/cbrtf_1u5.c67
-rw-r--r--pl/math/cbrtf_data.c15
-rw-r--r--pl/math/cosh_2u.c66
-rw-r--r--pl/math/coshf_1u9.c71
-rw-r--r--pl/math/erfc_4u5.c155
-rw-r--r--pl/math/erfc_data.c145
-rw-r--r--pl/math/erfcf.h38
-rw-r--r--pl/math/erfcf_2u.c133
-rw-r--r--pl/math/erfcf_data.c57
-rw-r--r--pl/math/erff_1u5.c108
-rw-r--r--pl/math/erff_data.c16
-rw-r--r--pl/math/estrin.h16
-rw-r--r--pl/math/estrin_wrap.h48
-rw-r--r--pl/math/estrinf.h14
-rw-r--r--pl/math/exp.c163
-rw-r--r--pl/math/exp_data.c1120
-rw-r--r--pl/math/expf.c76
-rw-r--r--pl/math/expf_data.c31
-rw-r--r--pl/math/expm1_2u5.c86
-rw-r--r--pl/math/expm1_data.c21
-rw-r--r--pl/math/expm1f_1u6.c80
-rw-r--r--pl/math/expm1f_data.c12
-rw-r--r--pl/math/horner.h14
-rw-r--r--pl/math/horner_wrap.h34
-rw-r--r--pl/math/hornerf.h14
-rw-r--r--pl/math/include/mathlib.h244
-rw-r--r--pl/math/include/pl_test.h26
-rw-r--r--pl/math/log.c161
-rw-r--r--pl/math/log10_2u.c150
-rw-r--r--pl/math/log10_data.c337
-rw-r--r--pl/math/log10f.c97
-rw-r--r--pl/math/log1p_2u.c136
-rw-r--r--pl/math/log1p_data.c19
-rw-r--r--pl/math/log1pf_2u1.c165
-rw-r--r--pl/math/log1pf_data.c14
-rw-r--r--pl/math/log_data.c511
-rw-r--r--pl/math/logf.c75
-rw-r--r--pl/math/logf_data.c36
-rw-r--r--pl/math/math_config.h572
-rw-r--r--pl/math/math_err.c78
-rw-r--r--pl/math/math_errf.c78
-rw-r--r--pl/math/pairwise_horner.h14
-rw-r--r--pl/math/pairwise_horner_wrap.h48
-rw-r--r--pl/math/pairwise_hornerf.h14
-rw-r--r--pl/math/pl_sig.h43
-rw-r--r--pl/math/s_acosh_3u5.c6
-rw-r--r--pl/math/s_acoshf_3u1.c6
-rw-r--r--pl/math/s_asinh_3u5.c6
-rw-r--r--pl/math/s_asinhf_2u7.c6
-rw-r--r--pl/math/s_atan2_3u.c6
-rw-r--r--pl/math/s_atan2f_3u.c6
-rw-r--r--pl/math/s_atan_2u5.c6
-rw-r--r--pl/math/s_atanf_3u.c6
-rw-r--r--pl/math/s_atanh_3u5.c6
-rw-r--r--pl/math/s_atanhf_3u1.c6
-rw-r--r--pl/math/s_cbrt_2u.c6
-rw-r--r--pl/math/s_cbrtf_1u5.c6
-rw-r--r--pl/math/s_cosh_2u.c6
-rw-r--r--pl/math/s_coshf_2u4.c6
-rw-r--r--pl/math/s_erf_2u.c6
-rw-r--r--pl/math/s_erfc_4u.c6
-rw-r--r--pl/math/s_erfcf_1u.c6
-rw-r--r--pl/math/s_erff_1u5.c6
-rw-r--r--pl/math/s_exp_tail.c6
-rw-r--r--pl/math/s_expf.c6
-rw-r--r--pl/math/s_expm1_2u5.c6
-rw-r--r--pl/math/s_expm1f_1u6.c6
-rw-r--r--pl/math/s_log10_2u5.c6
-rw-r--r--pl/math/s_log10f_3u5.c6
-rw-r--r--pl/math/s_log1p_2u5.c6
-rw-r--r--pl/math/s_log1pf_2u1.c6
-rw-r--r--pl/math/s_log2_3u.c6
-rw-r--r--pl/math/s_log2f_2u5.c6
-rw-r--r--pl/math/s_sinh_3u.c6
-rw-r--r--pl/math/s_sinhf_2u3.c6
-rw-r--r--pl/math/s_tan_3u5.c6
-rw-r--r--pl/math/s_tanf_3u5.c6
-rw-r--r--pl/math/s_tanh_3u.c6
-rw-r--r--pl/math/s_tanhf_2u6.c6
-rw-r--r--pl/math/sinh_3u.c66
-rw-r--r--pl/math/sinhf_2u3.c76
-rw-r--r--pl/math/sv_atan2_2u5.c93
-rw-r--r--pl/math/sv_atan2f_3u.c94
-rw-r--r--pl/math/sv_atan_2u5.c62
-rw-r--r--pl/math/sv_atan_common.h61
-rw-r--r--pl/math/sv_atanf_2u9.c59
-rw-r--r--pl/math/sv_atanf_common.h47
-rw-r--r--pl/math/sv_cos_2u5.c84
-rw-r--r--pl/math/sv_cosf_2u1.c82
-rw-r--r--pl/math/sv_erf_3u.c103
-rw-r--r--pl/math/sv_erfc_4u.c146
-rw-r--r--pl/math/sv_erff_1u3.c104
-rw-r--r--pl/math/sv_exp_tail.h79
-rw-r--r--pl/math/sv_expf_2u.c156
-rw-r--r--pl/math/sv_expf_data.c12
-rw-r--r--pl/math/sv_log10_2u5.c89
-rw-r--r--pl/math/sv_log10f_3u5.c88
-rw-r--r--pl/math/sv_log2_3u.c85
-rw-r--r--pl/math/sv_log2f_2u5.c79
-rw-r--r--pl/math/sv_log_2u5.c85
-rw-r--r--pl/math/sv_log_data.c146
-rw-r--r--pl/math/sv_logf_3u4.c77
-rw-r--r--pl/math/sv_logf_data.c12
-rw-r--r--pl/math/sv_math.h245
-rw-r--r--pl/math/sv_powi.c53
-rw-r--r--pl/math/sv_powif.c54
-rw-r--r--pl/math/sv_sin_3u.c89
-rw-r--r--pl/math/sv_sinf_1u9.c84
-rw-r--r--pl/math/sv_sinf_poly_data.c19
-rw-r--r--pl/math/sv_tanf_3u5.c112
-rw-r--r--pl/math/tanf_3u3.c202
-rw-r--r--pl/math/tanf_data.c45
-rw-r--r--pl/math/tanh_3u.c82
-rw-r--r--pl/math/tanhf_2u6.c91
-rw-r--r--pl/math/test/mathbench_funcs.h86
-rw-r--r--pl/math/test/mathbench_wrappers.h133
-rw-r--r--pl/math/test/pl_test.h33
-rwxr-xr-xpl/math/test/runulp.sh78
-rw-r--r--pl/math/test/testcases/directed/acosh.tst19
-rw-r--r--pl/math/test/testcases/directed/acoshf.tst19
-rw-r--r--pl/math/test/testcases/directed/asinh.tst18
-rw-r--r--pl/math/test/testcases/directed/asinhf.tst18
-rw-r--r--pl/math/test/testcases/directed/atan.tst22
-rw-r--r--pl/math/test/testcases/directed/atan2.tst110
-rw-r--r--pl/math/test/testcases/directed/atan2f.tst121
-rw-r--r--pl/math/test/testcases/directed/atanf.tst22
-rw-r--r--pl/math/test/testcases/directed/atanh.tst22
-rw-r--r--pl/math/test/testcases/directed/atanhf.tst23
-rw-r--r--pl/math/test/testcases/directed/cbrtf.tst29
-rw-r--r--pl/math/test/testcases/directed/cosh.tst15
-rw-r--r--pl/math/test/testcases/directed/coshf.tst15
-rw-r--r--pl/math/test/testcases/directed/erfc.tst23
-rw-r--r--pl/math/test/testcases/directed/erfcf.tst14
-rw-r--r--pl/math/test/testcases/directed/erff.tst17
-rw-r--r--pl/math/test/testcases/directed/expm1.tst21
-rw-r--r--pl/math/test/testcases/directed/expm1f.tst57
-rw-r--r--pl/math/test/testcases/directed/log10.tst16
-rw-r--r--pl/math/test/testcases/directed/log10f.tst69
-rw-r--r--pl/math/test/testcases/directed/log1p.tst22
-rw-r--r--pl/math/test/testcases/directed/log1pf.tst130
-rw-r--r--pl/math/test/testcases/directed/log2.tst21
-rw-r--r--pl/math/test/testcases/directed/log2f.tst27
-rw-r--r--pl/math/test/testcases/directed/sinh.tst21
-rw-r--r--pl/math/test/testcases/directed/sinhf.tst21
-rw-r--r--pl/math/test/testcases/directed/tanf.tst25
-rw-r--r--pl/math/test/testcases/directed/tanh.tst18
-rw-r--r--pl/math/test/testcases/directed/tanhf.tst20
-rw-r--r--pl/math/test/testcases/random/double.tst6
-rw-r--r--pl/math/test/testcases/random/float.tst8
-rw-r--r--pl/math/test/ulp_funcs.h66
-rw-r--r--pl/math/test/ulp_wrappers.h148
-rw-r--r--pl/math/tools/asinh.sollya28
-rw-r--r--pl/math/tools/asinhf.sollya29
-rw-r--r--pl/math/tools/atan.sollya23
-rw-r--r--pl/math/tools/atanf.sollya20
-rw-r--r--pl/math/tools/cbrt.sollya20
-rw-r--r--pl/math/tools/cbrtf.sollya20
-rw-r--r--pl/math/tools/erfc.sollya23
-rw-r--r--pl/math/tools/erfcf.sollya31
-rw-r--r--pl/math/tools/expm1.sollya21
-rw-r--r--pl/math/tools/expm1f.sollya21
-rw-r--r--pl/math/tools/log10.sollya44
-rw-r--r--pl/math/tools/log10f.sollya37
-rw-r--r--pl/math/tools/log1p.sollya30
-rw-r--r--pl/math/tools/log1pf.sollya21
-rw-r--r--pl/math/tools/tan.sollya20
-rw-r--r--pl/math/tools/tanf.sollya78
-rw-r--r--pl/math/tools/v_erf.sollya20
-rw-r--r--pl/math/tools/v_erfc.sollya46
-rw-r--r--pl/math/tools/v_log10.sollya38
-rw-r--r--pl/math/tools/v_log10f.sollya45
-rw-r--r--pl/math/tools/v_log2f.sollya38
-rw-r--r--pl/math/v_acosh_3u5.c51
-rw-r--r--pl/math/v_acoshf_3u1.c68
-rw-r--r--pl/math/v_asinh_3u5.c175
-rw-r--r--pl/math/v_asinhf_2u7.c70
-rw-r--r--pl/math/v_atan2_3u.c90
-rw-r--r--pl/math/v_atan2f_3u.c89
-rw-r--r--pl/math/v_atan_2u5.c74
-rw-r--r--pl/math/v_atanf_3u.c83
-rw-r--r--pl/math/v_atanh_3u5.c61
-rw-r--r--pl/math/v_atanhf_3u1.c62
-rw-r--r--pl/math/v_cbrt_2u.c98
-rw-r--r--pl/math/v_cbrtf_1u5.c96
-rw-r--r--pl/math/v_cosh_2u.c96
-rw-r--r--pl/math/v_coshf_2u4.c74
-rw-r--r--pl/math/v_erf_2u.c116
-rw-r--r--pl/math/v_erf_data.c119
-rw-r--r--pl/math/v_erfc_4u.c168
-rw-r--r--pl/math/v_erfc_data.c96
-rw-r--r--pl/math/v_erfcf_1u.c183
-rw-r--r--pl/math/v_erff_1u5.c116
-rw-r--r--pl/math/v_erff_data.c18
-rw-r--r--pl/math/v_exp_tail.c75
-rw-r--r--pl/math/v_exp_tail.h21
-rw-r--r--pl/math/v_exp_tail_data.c97
-rw-r--r--pl/math/v_expf.c83
-rw-r--r--pl/math/v_expm1_2u5.c113
-rw-r--r--pl/math/v_expm1f_1u6.c94
-rw-r--r--pl/math/v_expm1f_inline.h49
-rw-r--r--pl/math/v_log10_2u5.c110
-rw-r--r--pl/math/v_log10_data.c167
-rw-r--r--pl/math/v_log10f_3u5.c82
-rw-r--r--pl/math/v_log10f_data.c13
-rw-r--r--pl/math/v_log1p_2u5.c120
-rw-r--r--pl/math/v_log1p_inline.h77
-rw-r--r--pl/math/v_log1pf_2u1.c160
-rw-r--r--pl/math/v_log1pf_inline.h55
-rw-r--r--pl/math/v_log2_3u.c100
-rw-r--r--pl/math/v_log2_data.c155
-rw-r--r--pl/math/v_log2f_2u5.c68
-rw-r--r--pl/math/v_log2f_data.c15
-rw-r--r--pl/math/v_math.h855
-rw-r--r--pl/math/v_sinh_3u.c94
-rw-r--r--pl/math/v_sinhf_2u3.c69
-rw-r--r--pl/math/v_tan_3u5.c102
-rw-r--r--pl/math/v_tan_data.c15
-rw-r--r--pl/math/v_tanf_3u5.c131
-rw-r--r--pl/math/v_tanh_3u.c94
-rw-r--r--pl/math/v_tanhf_2u6.c69
-rw-r--r--pl/math/vn_acosh_3u5.c12
-rw-r--r--pl/math/vn_acoshf_3u1.c12
-rw-r--r--pl/math/vn_asinh_3u5.c12
-rw-r--r--pl/math/vn_asinhf_2u7.c12
-rw-r--r--pl/math/vn_atan2_3u.c12
-rw-r--r--pl/math/vn_atan2f_3u.c12
-rw-r--r--pl/math/vn_atan_2u5.c12
-rw-r--r--pl/math/vn_atanf_3u.c12
-rw-r--r--pl/math/vn_atanh_3u5.c12
-rw-r--r--pl/math/vn_atanhf_3u1.c12
-rw-r--r--pl/math/vn_cbrt_2u.c12
-rw-r--r--pl/math/vn_cbrtf_1u5.c12
-rw-r--r--pl/math/vn_cosh_2u.c12
-rw-r--r--pl/math/vn_coshf_2u4.c12
-rw-r--r--pl/math/vn_erf_2u.c12
-rw-r--r--pl/math/vn_erfc_4u.c12
-rw-r--r--pl/math/vn_erfcf_1u.c12
-rw-r--r--pl/math/vn_erff_1u5.c12
-rw-r--r--pl/math/vn_exp_tail.c11
-rw-r--r--pl/math/vn_expf.c12
-rw-r--r--pl/math/vn_expm1_2u5.c12
-rw-r--r--pl/math/vn_expm1f_1u6.c12
-rw-r--r--pl/math/vn_log10_2u5.c12
-rw-r--r--pl/math/vn_log10f_3u5.c12
-rw-r--r--pl/math/vn_log1p_2u5.c12
-rw-r--r--pl/math/vn_log1pf_2u1.c12
-rw-r--r--pl/math/vn_log2_3u.c12
-rw-r--r--pl/math/vn_log2f_2u5.c12
-rw-r--r--pl/math/vn_sinh_3u.c12
-rw-r--r--pl/math/vn_sinhf_2u3.c12
-rw-r--r--pl/math/vn_tan_3u5.c12
-rw-r--r--pl/math/vn_tanf_3u5.c12
-rw-r--r--pl/math/vn_tanh_3u.c12
-rw-r--r--pl/math/vn_tanhf_2u6.c12
275 files changed, 17834 insertions, 0 deletions
diff --git a/pl/Dir.mk b/pl/Dir.mk
new file mode 100644
index 0000000..2d00779
--- /dev/null
+++ b/pl/Dir.mk
@@ -0,0 +1,21 @@
+# Makefile fragment - requires GNU make
+#
+# Copyright (c) 2022, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+# These targets are defined if we prescribe pl in SUBS.
+# It requires PLSUBS to be set.
+
+$(foreach sub,$(PLSUBS),$(eval include $(srcdir)/pl/$(sub)/Dir.mk))
+
+pl-files := $($(PLSUBS:%=pl/%-files))
+
+all-pl: $(PLSUBS:%=all-pl/%)
+
+check-pl: $(PLSUBS:%=check-pl/%)
+
+install-pl: $(PLSUBS:%=install-pl/%)
+
+clean-pl: $(PLSUBS:%=clean-pl/%)
+
+.PHONY: all-pl check-pl install-pl clean-pl
diff --git a/pl/README.contributors b/pl/README.contributors
new file mode 100644
index 0000000..3af9b1f
--- /dev/null
+++ b/pl/README.contributors
@@ -0,0 +1,23 @@
+Code in this sub-directory should follow the GNU Coding Standard, but it is
+not expected to be upstreamed into glibc without modification, so
+glibc-specific conventions need not be followed.
+
+The requirements for portable code apply to non-portable code with the
+following differences:
+
+
+1. Worst-case ULP error should be encoded in filenames (e.g. sin_u35.c). There
+ are no specific restrictions on acceptable ULP error, but if functions
+ provide significantly less accuracy than portable equivalents then a clear
+ justification for inclusion should be stated in comments at the top of the
+ source file. Error bounds of the approximation should be clearly documented
+ in comments.
+
+2. Functions are assumed to support round-to-nearest mode by default, unless
+ stated; other rounding modes are not required to be provided.
+
+3. Handling of special cases may be relaxed for vector functions. Checking
+ whether each vector lane contains special values such as NaN, Inf or
+ denormal numbers can prove too costly for vector functions. This is often
+ not required since vector functions are typically used along with aggressive
+ compiler optimization flags.
diff --git a/pl/math/Dir.mk b/pl/math/Dir.mk
new file mode 100644
index 0000000..be65344
--- /dev/null
+++ b/pl/math/Dir.mk
@@ -0,0 +1,229 @@
+# Makefile fragment - requires GNU make
+#
+# Copyright (c) 2019-2023, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+PLM := $(srcdir)/pl/math
+AOR := $(srcdir)/math
+B := build/pl/math
+
+math-lib-srcs := $(wildcard $(PLM)/*.[cS])
+math-test-srcs := \
+ $(AOR)/test/mathtest.c \
+ $(AOR)/test/mathbench.c \
+ $(AOR)/test/ulp.c \
+
+math-test-host-srcs := $(wildcard $(AOR)/test/rtest/*.[cS])
+
+math-includes := $(patsubst $(PLM)/%,build/pl/%,$(wildcard $(PLM)/include/*.h))
+math-test-includes := $(patsubst $(PLM)/%,build/pl/include/%,$(wildcard $(PLM)/test/*.h))
+
+math-libs := \
+ build/pl/lib/libmathlib.so \
+ build/pl/lib/libmathlib.a \
+
+math-tools := \
+ build/pl/bin/mathtest \
+ build/pl/bin/mathbench \
+ build/pl/bin/mathbench_libc \
+ build/pl/bin/runulp.sh \
+ build/pl/bin/ulp \
+
+math-host-tools := \
+ build/pl/bin/rtest \
+
+math-lib-objs := $(patsubst $(PLM)/%,$(B)/%.o,$(basename $(math-lib-srcs)))
+math-test-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-srcs)))
+math-host-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
+math-target-objs := $(math-lib-objs) $(math-test-objs)
+math-objs := $(math-target-objs) $(math-target-objs:%.o=%.os) $(math-host-objs)
+
+pl/math-files := \
+ $(math-objs) \
+ $(math-libs) \
+ $(math-tools) \
+ $(math-host-tools) \
+ $(math-includes) \
+ $(math-test-includes) \
+
+all-pl/math: $(math-libs) $(math-tools) $(math-includes) $(math-test-includes)
+
+$(math-objs): $(math-includes) $(math-test-includes)
+$(math-objs): CFLAGS_PL += $(math-cflags)
+$(B)/test/mathtest.o: CFLAGS_PL += -fmath-errno
+$(math-host-objs): CC = $(HOST_CC)
+$(math-host-objs): CFLAGS_PL = $(HOST_CFLAGS)
+
+build/pl/include/test/ulp_funcs_gen.h: $(math-lib-srcs)
+ # Replace PL_SIG
+ cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=_Z##v##t##a(f)" -P > $@
+
+build/pl/include/test/mathbench_funcs_gen.h: $(math-lib-srcs)
+ # Replace PL_SIG macros with mathbench func entries
+ cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=_Z##v##t##a(f, ##__VA_ARGS__)" -P > $@
+
+build/pl/include/test/ulp_wrappers_gen.h: $(math-lib-srcs)
+ # Replace PL_SIG macros with ULP wrapper declarations
+ cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=Z##v##N##t##a##_WRAP(f)" -P > $@
+
+$(B)/test/ulp.o: $(AOR)/test/ulp.h build/pl/include/test/ulp_funcs_gen.h build/pl/include/test/ulp_wrappers_gen.h
+$(B)/test/ulp.o: CFLAGS_PL += -I build/pl/include/test
+
+$(B)/test/mathbench.o: build/pl/include/test/mathbench_funcs_gen.h
+$(B)/test/mathbench.o: CFLAGS_PL += -I build/pl/include/test
+
+build/pl/lib/libmathlib.so: $(math-lib-objs:%.o=%.os)
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -shared -o $@ $^
+
+build/pl/lib/libmathlib.a: $(math-lib-objs)
+ rm -f $@
+ $(AR) rc $@ $^
+ $(RANLIB) $@
+
+$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
+$(math-tools): LDLIBS += $(math-ldlibs) -lm
+
+# Some targets to build pl/math/test from math/test sources
+build/pl/math/test/%.o: $(srcdir)/math/test/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.o: $(srcdir)/math/test/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.os: $(srcdir)/math/test/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.os: $(srcdir)/math/test/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+# Some targets to build pl/ sources using appropriate flags
+build/pl/%.o: $(srcdir)/pl/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.o: $(srcdir)/pl/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.os: $(srcdir)/pl/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.os: $(srcdir)/pl/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/bin/rtest: $(math-host-objs)
+ $(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
+
+build/pl/bin/mathtest: $(B)/test/mathtest.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+build/pl/bin/mathbench: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+# This is not ideal, but allows custom symbols in mathbench to get resolved.
+build/pl/bin/mathbench_libc: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/pl/lib/libmathlib.a -lm
+
+build/pl/bin/ulp: $(B)/test/ulp.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+build/pl/include/%.h: $(PLM)/include/%.h
+ cp $< $@
+
+build/pl/include/test/%.h: $(PLM)/test/%.h
+ cp $< $@
+
+build/pl/bin/%.sh: $(PLM)/test/%.sh
+ cp $< $@
+
+pl-math-tests := $(wildcard $(PLM)/test/testcases/directed/*.tst)
+pl-math-rtests := $(wildcard $(PLM)/test/testcases/random/*.tst)
+
+check-pl/math-test: $(math-tools)
+ cat $(pl-math-tests) | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
+
+check-pl/math-rtest: $(math-host-tools) $(math-tools)
+ cat $(pl-math-rtests) | build/pl/bin/rtest | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
+
+ulp-input-dir=$(B)/test/inputs
+
+math-lib-lims = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.ulp,$(basename $(math-lib-srcs)))
+math-lib-aliases = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.alias,$(basename $(math-lib-srcs)))
+math-lib-fenvs = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.fenv,$(basename $(math-lib-srcs)))
+math-lib-itvs = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.itv,$(basename $(math-lib-srcs)))
+
+ulp-inputs = $(math-lib-lims) $(math-lib-aliases) $(math-lib-fenvs) $(math-lib-itvs)
+
+$(ulp-inputs): CFLAGS_PL += -I$(PLM) -I$(PLM)/include $(math-cflags)
+
+$(ulp-input-dir)/%.ulp: $(PLM)/%.c
+ mkdir -p $(@D)
+ $(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep -o "PL_TEST_ULP [^ ]* [^ ]*" || true; } > $@
+
+$(ulp-input-dir)/%.alias: $(PLM)/%.c
+ mkdir -p $(@D)
+ $(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep "PL_TEST_ALIAS" || true; } | sed "s/_x / /g"> $@
+
+$(ulp-input-dir)/%.fenv: $(PLM)/%.c
+ mkdir -p $(@D)
+ $(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep -o "PL_TEST_EXPECT_FENV_ENABLED [^ ]*" || true; } > $@
+
+$(ulp-input-dir)/%.itv: $(PLM)/%.c
+ mkdir -p $(dir $@)
+ $(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep "PL_TEST_INTERVAL " || true; } | sed "s/ PL_TEST_INTERVAL/\nPL_TEST_INTERVAL/g" > $@
+
+ulp-lims := $(ulp-input-dir)/limits
+$(ulp-lims): $(math-lib-lims)
+ cat $^ | sed "s/PL_TEST_ULP //g;s/^ *//g" > $@
+
+ulp-aliases := $(ulp-input-dir)/aliases
+$(ulp-aliases): $(math-lib-aliases)
+ cat $^ | sed "s/PL_TEST_ALIAS //g;s/^ *//g" > $@
+
+fenv-exps := $(ulp-input-dir)/fenv
+$(fenv-exps): $(math-lib-fenvs)
+ cat $^ | sed "s/PL_TEST_EXPECT_FENV_ENABLED //g;s/^ *//g" > $@
+
+ulp-itvs-noalias := $(ulp-input-dir)/itvs_noalias
+$(ulp-itvs-noalias): $(math-lib-itvs)
+ cat $^ > $@
+
+rename-aliases := $(ulp-input-dir)/rename_alias.sed
+$(rename-aliases): $(ulp-aliases)
+ # Build sed script for replacing aliases from generated alias file
+ cat $< | awk '{ print "s/ " $$1 " / " $$2 " /g" }' > $@
+
+ulp-itvs-alias := $(ulp-input-dir)/itvs_alias
+$(ulp-itvs-alias): $(ulp-itvs-noalias) $(rename-aliases)
+ cat $< | sed -f $(rename-aliases) > $@
+
+ulp-itvs := $(ulp-input-dir)/intervals
+$(ulp-itvs): $(ulp-itvs-alias) $(ulp-itvs-noalias)
+ cat $^ | sort -u | sed "s/PL_TEST_INTERVAL //g" > $@
+
+check-pl/math-ulp: $(math-tools) $(ulp-lims) $(ulp-aliases) $(fenv-exps) $(ulp-itvs)
+ WANT_SVE_MATH=$(WANT_SVE_MATH) \
+ ULPFLAGS="$(math-ulpflags)" \
+ LIMITS=../../../$(ulp-lims) \
+ ALIASES=../../../$(ulp-aliases) \
+ INTERVALS=../../../$(ulp-itvs) \
+ FENV=../../../$(fenv-exps) \
+ build/pl/bin/runulp.sh $(EMULATOR)
+
+check-pl/math: check-pl/math-test check-pl/math-rtest check-pl/math-ulp
+
+$(DESTDIR)$(libdir)/pl/%.so: build/pl/lib/%.so
+ $(INSTALL) -D $< $@
+
+$(DESTDIR)$(libdir)/pl/%: build/pl/lib/%
+ $(INSTALL) -m 644 -D $< $@
+
+$(DESTDIR)$(includedir)/pl/%: build/pl/include/%
+ $(INSTALL) -m 644 -D $< $@
+
+install-pl/math: \
+ $(math-libs:build/pl/lib/%=$(DESTDIR)$(libdir)/pl/%) \
+ $(math-includes:build/pl/include/%=$(DESTDIR)$(includedir)/pl/%)
+
+clean-pl/math:
+ rm -f $(pl/math-files)
+
+.PHONY: all-pl/math check-pl/math-test check-pl/math-rtest check-pl/math-ulp check-pl/math install-pl/math clean-pl/math
diff --git a/pl/math/acosh_3u.c b/pl/math/acosh_3u.c
new file mode 100644
index 0000000..4e2cb67
--- /dev/null
+++ b/pl/math/acosh_3u.c
@@ -0,0 +1,66 @@
+/*
+ * Double-precision acosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Ln2 (0x1.62e42fefa39efp-1)
+#define MinusZero (0x8000000000000000)
+#define SquareLim (0x5fe0000000000000) /* asuint64(0x1.0p511). */
+#define Two (0x4000000000000000) /* asuint64(2.0). */
+
+double
+optr_aor_log_f64 (double);
+
+double
+log1p (double);
+
+/* acosh approximation using a variety of approaches on different intervals:
+
+ acosh(x) = ln(x + sqrt(x * x - 1)).
+
+ x >= 2^511: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
+ close enough to x that we can calculate the result by ln(2x) == ln(x) +
+ ln(2). The greatest observed error in this region is 0.98 ULP:
+ acosh(0x1.1b9bf42923d1dp+853) got 0x1.28066a11a7c7fp+9
+ want 0x1.28066a11a7c8p+9.
+
+ x > 2: Calculate the result directly using definition of acosh(x). Greatest
+ observed error in this region is 1.33 ULP:
+ acosh(0x1.1e45d14bfcfa2p+1) got 0x1.71a06f50c34b5p+0
+ want 0x1.71a06f50c34b6p+0.
+
+ 0 <= x <= 2: Calculate the result using log1p. For x < 1, acosh(x) is
+ undefined. For 1 <= x <= 2, the largest observed error is 2.69 ULP:
+ acosh(0x1.073528248093p+0) got 0x1.e4d9bd20684f3p-3
+ want 0x1.e4d9bd20684f6p-3. */
+double
+acosh (double x)
+{
+ uint64_t ix = asuint64 (x);
+
+ if (unlikely (ix >= MinusZero))
+ return __math_invalid (x);
+
+ if (unlikely (ix >= SquareLim))
+ return optr_aor_log_f64 (x) + Ln2;
+
+ if (ix >= Two)
+ return optr_aor_log_f64 (x + sqrt (x * x - 1));
+
+ double xm1 = x - 1;
+ return log1p (xm1 + sqrt (2 * xm1 + xm1 * xm1));
+}
+
+PL_SIG (S, D, 1, acosh, 1.0, 10.0)
+PL_TEST_ULP (acosh, 2.19)
+PL_TEST_INTERVAL (acosh, 0, 1, 10000)
+PL_TEST_INTERVAL (acosh, 1, 2, 100000)
+PL_TEST_INTERVAL (acosh, 2, 0x1p511, 100000)
+PL_TEST_INTERVAL (acosh, 0x1p511, inf, 100000)
+PL_TEST_INTERVAL (acosh, -0, -inf, 10000)
diff --git a/pl/math/acoshf_2u8.c b/pl/math/acoshf_2u8.c
new file mode 100644
index 0000000..c9cded7
--- /dev/null
+++ b/pl/math/acoshf_2u8.c
@@ -0,0 +1,63 @@
+/*
+ * Single-precision acosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Ln2 (0x1.62e4p-1f)
+#define MinusZero 0x80000000
+#define SquareLim 0x5f800000 /* asuint(0x1p64). */
+#define Two 0x40000000
+
+/* Single-precision log from math/. */
+float
+optr_aor_log_f32 (float);
+
+/* Single-precision log(1+x) from pl/math. */
+float
+log1pf (float);
+
+/* acoshf approximation using a variety of approaches on different intervals:
+
+ x >= 2^64: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
+ close enough to x that we can calculate the result by ln(2x) == ln(x) +
+ ln(2). The greatest error in the region is 0.94 ULP:
+ acoshf(0x1.15f706p+92) got 0x1.022e14p+6 want 0x1.022e16p+6.
+
+ x > 2: Calculate the result directly using definition of asinh(x) = ln(x +
+ sqrt(x*x - 1)). Greatest error in this region is 1.30 ULP:
+ acoshf(0x1.249d8p+1) got 0x1.77e1aep+0 want 0x1.77e1bp+0.
+
+ 0 <= x <= 2: Calculate the result using log1p. For x < 1, acosh(x) is
+ undefined. For 1 <= x <= 2, the greatest error is 2.78 ULP:
+ acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 want 0x1.ef9ea2p-3. */
+float
+acoshf (float x)
+{
+ uint32_t ix = asuint (x);
+
+ if (unlikely (ix >= MinusZero))
+ return __math_invalidf (x);
+
+ if (unlikely (ix >= SquareLim))
+ return optr_aor_log_f32 (x) + Ln2;
+
+ if (ix > Two)
+ return optr_aor_log_f32 (x + sqrtf (x * x - 1));
+
+ float xm1 = x - 1;
+ return log1pf (xm1 + sqrtf (2 * xm1 + xm1 * xm1));
+}
+
+PL_SIG (S, F, 1, acosh, 1.0, 10.0)
+PL_TEST_ULP (acoshf, 2.30)
+PL_TEST_INTERVAL (acoshf, 0, 1, 100)
+PL_TEST_INTERVAL (acoshf, 1, 2, 10000)
+PL_TEST_INTERVAL (acoshf, 2, 0x1p64, 100000)
+PL_TEST_INTERVAL (acoshf, 0x1p64, inf, 100000)
+PL_TEST_INTERVAL (acoshf, -0, -inf, 10000)
diff --git a/pl/math/asinh_2u5.c b/pl/math/asinh_2u5.c
new file mode 100644
index 0000000..f167955
--- /dev/null
+++ b/pl/math/asinh_2u5.c
@@ -0,0 +1,86 @@
+/*
+ * Double-precision asinh(x) function
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "estrin.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define ExpM26 0x3e50000000000000 /* asuint64(0x1.0p-26). */
+#define One 0x3ff0000000000000 /* asuint64(1.0). */
+#define Exp511 0x5fe0000000000000 /* asuint64(0x1.0p511). */
+#define Ln2 0x1.62e42fefa39efp-1
+
+double
+optr_aor_log_f64 (double);
+
+/* Scalar double-precision asinh implementation. This routine uses different
+ approaches on different intervals:
+
+ |x| < 2^-26: Return x. Function is exact in this region.
+
+ |x| < 1: Use custom order-17 polynomial. This is least accurate close to 1.
+ The largest observed error in this region is 1.47 ULPs:
+ asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
+ want 0x1.c1d6bf874019cp-1.
+
+ |x| < 2^511: Upper bound of this region is close to sqrt(DBL_MAX). Calculate
+ the result directly using the definition asinh(x) = ln(x + sqrt(x*x + 1)).
+ The largest observed error in this region is 2.03 ULPs:
+ asinh(-0x1.00094e0f39574p+0) got -0x1.c3508eb6a681ep-1
+ want -0x1.c3508eb6a682p-1.
+
+ |x| >= 2^511: We cannot square x without overflow at a low
+ cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
+ even double x without overflow, so calculate this as ln(x) +
+ ln(2). The largest observed error in this region is 0.98 ULPs at many
+ values, for instance:
+ asinh(0x1.5255a4cf10319p+975) got 0x1.52652f4cb26cbp+9
+ want 0x1.52652f4cb26ccp+9. */
+double
+asinh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & AbsMask;
+ double ax = asdouble (ia);
+ uint64_t sign = ix & ~AbsMask;
+
+ if (ia < ExpM26)
+ {
+ return x;
+ }
+
+ if (ia < One)
+ {
+ double x2 = x * x;
+ double z2 = x2 * x2;
+ double z4 = z2 * z2;
+ double z8 = z4 * z4;
+#define C(i) __asinh_data.poly[i]
+ double p = ESTRIN_17 (x2, z2, z4, z8, z8 * z8, C);
+ double y = fma (p, x2 * ax, ax);
+ return asdouble (asuint64 (y) | sign);
+ }
+
+ if (unlikely (ia >= Exp511))
+ {
+ return asdouble (asuint64 (optr_aor_log_f64 (ax) + Ln2) | sign);
+ }
+
+ return asdouble (asuint64 (optr_aor_log_f64 (ax + sqrt (ax * ax + 1)))
+ | sign);
+}
+
+PL_SIG (S, D, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (asinh, 1.54)
+PL_TEST_INTERVAL (asinh, -0x1p-26, 0x1p-26, 50000)
+PL_TEST_INTERVAL (asinh, 0x1p-26, 1.0, 40000)
+PL_TEST_INTERVAL (asinh, -0x1p-26, -1.0, 10000)
+PL_TEST_INTERVAL (asinh, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (asinh, -1.0, -100.0, 10000)
+PL_TEST_INTERVAL (asinh, 100.0, inf, 50000)
+PL_TEST_INTERVAL (asinh, -100.0, -inf, 10000)
diff --git a/pl/math/asinh_data.c b/pl/math/asinh_data.c
new file mode 100644
index 0000000..073b197
--- /dev/null
+++ b/pl/math/asinh_data.c
@@ -0,0 +1,22 @@
+/*
+ * Double-precision polynomial coefficients for scalar asinh(x)
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* asinh(x) is odd, and the first term of the Taylor expansion is x, so we can
+ approximate the function by x + x^3 * P(x^2), where P(z) has the form:
+ C0 + C1 * z + C2 * z^2 + C3 * z^3 + ...
+ Note P is evaluated on even powers of x only. See tools/asinh.sollya for the
+ algorithm used to generate these coefficients. */
+const struct asinh_data __asinh_data
+ = {.poly
+ = {-0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
+ 0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
+ -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
+ 0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
+ -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
+ 0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18}};
diff --git a/pl/math/asinhf_3u5.c b/pl/math/asinhf_3u5.c
new file mode 100644
index 0000000..2b2c55d
--- /dev/null
+++ b/pl/math/asinhf_3u5.c
@@ -0,0 +1,78 @@
+/*
+ * Single-precision asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "estrinf.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask (0x7fffffff)
+#define SqrtFltMax (0x1.749e96p+10f)
+#define Ln2 (0x1.62e4p-1f)
+#define One (0x3f8)
+#define ExpM12 (0x398)
+
+#define C(i) __asinhf_data.coeffs[i]
+
+float
+optr_aor_log_f32 (float);
+
+/* asinhf approximation using a variety of approaches on different intervals:
+
+ |x| < 2^-12: Return x. Function is exactly rounded in this region.
+
+ |x| < 1.0: Use custom order-8 polynomial. The largest observed
+ error in this region is 1.3ulps:
+ asinhf(0x1.f0f74cp-1) got 0x1.b88de4p-1 want 0x1.b88de2p-1.
+
+ |x| <= SqrtFltMax: Calculate the result directly using the
+ definition of asinh(x) = ln(x + sqrt(x*x + 1)). The largest
+ observed error in this region is 1.99ulps.
+ asinhf(0x1.00e358p+0) got 0x1.c4849ep-1 want 0x1.c484a2p-1.
+
+ |x| > SqrtFltMax: We cannot square x without overflow at a low
+ cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
+ even double x without overflow, so calculate this as ln(x) +
+ ln(2). This largest observed error in this region is 3.39ulps.
+ asinhf(0x1.749e9ep+10) got 0x1.fffff8p+2 want 0x1.fffffep+2. */
+float
+asinhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 20;
+ float ax = asfloat (ia);
+ uint32_t sign = ix & ~AbsMask;
+
+ if (unlikely (ia12 < ExpM12 || ia == 0x7f800000))
+ return x;
+
+ if (unlikely (ia12 >= 0x7f8))
+ return __math_invalidf (x);
+
+ if (ia12 < One)
+ {
+ float x2 = ax * ax;
+ float p = ESTRIN_7 (ax, x2, x2 * x2, C);
+ float y = fmaf (x2, p, ax);
+ return asfloat (asuint (y) | sign);
+ }
+
+ if (unlikely (ax > SqrtFltMax))
+ {
+ return asfloat (asuint (optr_aor_log_f32 (ax) + Ln2) | sign);
+ }
+
+ return asfloat (asuint (optr_aor_log_f32 (ax + sqrtf (ax * ax + 1))) | sign);
+}
+
+PL_SIG (S, F, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (asinhf, 2.9)
+PL_TEST_INTERVAL (asinhf, 0, 0x1p-12, 5000)
+PL_TEST_INTERVAL (asinhf, 0x1p-12, 1.0, 50000)
+PL_TEST_INTERVAL (asinhf, 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (asinhf, 0x1p11, 0x1p127, 20000)
diff --git a/pl/math/asinhf_data.c b/pl/math/asinhf_data.c
new file mode 100644
index 0000000..cd1ef16
--- /dev/null
+++ b/pl/math/asinhf_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients for single-precision asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya for
+ these coeffs were generated. */
+const struct asinhf_data __asinhf_data
+ = {.coeffs
+ = {-0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f, 0x1.3a81dcp-4f,
+ 0x1.65bbaap-10f, -0x1.057f1p-4f, 0x1.6c1d46p-5f, -0x1.4cafe8p-7f}};
diff --git a/pl/math/atan2_2u5.c b/pl/math/atan2_2u5.c
new file mode 100644
index 0000000..c909ac9
--- /dev/null
+++ b/pl/math/atan2_2u5.c
@@ -0,0 +1,159 @@
+/*
+ * Double-precision scalar atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdbool.h>
+
+#include "atan_common.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Pi (0x1.921fb54442d18p+1)
+#define PiOver2 (0x1.921fb54442d18p+0)
+#define PiOver4 (0x1.921fb54442d18p-1)
+#define SignMask (0x8000000000000000)
+#define ExpMask (0x7ff0000000000000)
+
+/* We calculate atan2 by P(n/d), where n and d are similar to the input
+ arguments, and P is a polynomial. Evaluating P(x) requires calculating x^8,
+ which may underflow if n and d have very different magnitude.
+ POW8_EXP_UFLOW_BOUND is the lower bound of the difference in exponents of n
+ and d for which P underflows, and is used to special-case such inputs. */
+#define POW8_EXP_UFLOW_BOUND 62
+
+static inline int64_t
+biased_exponent (double f)
+{
+ uint64_t fi = asuint64 (f);
+ return (fi & ExpMask) >> 52;
+}
+
+/* Fast implementation of scalar atan2. Largest errors are when y and x are
+ close together. The greatest observed error is 2.28 ULP:
+ atan2(-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
+ got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */
+double
+atan2 (double y, double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iy = asuint64 (y);
+
+ uint64_t sign_x = ix & SignMask;
+ uint64_t sign_y = iy & SignMask;
+
+ uint64_t iax = ix & ~SignMask;
+ uint64_t iay = iy & ~SignMask;
+
+ bool xisnan = isnan (x);
+ if (unlikely (isnan (y) && !xisnan))
+ return __math_invalid (y);
+ if (unlikely (xisnan))
+ return __math_invalid (x);
+
+ /* m = 2 * sign(x) + sign(y). */
+ uint32_t m = ((iy >> 63) & 1) | ((ix >> 62) & 2);
+
+ int64_t exp_diff = biased_exponent (x) - biased_exponent (y);
+
+ /* y = 0. */
+ if (iay == 0)
+ {
+ switch (m)
+ {
+ case 0:
+ case 1:
+ return y; /* atan(+-0,+anything)=+-0. */
+ case 2:
+ return Pi; /* atan(+0,-anything) = pi. */
+ case 3:
+ return -Pi; /* atan(-0,-anything) =-pi. */
+ }
+ }
+ /* Special case for (x, y) either on or very close to the y axis. Either x =
+ 0, or y is much larger than x (difference in exponents >=
+ POW8_EXP_UFLOW_BOUND). */
+ if (unlikely (iax == 0 || exp_diff <= -POW8_EXP_UFLOW_BOUND))
+ return sign_y ? -PiOver2 : PiOver2;
+
+ /* Special case for either x is INF or (x, y) is very close to x axis and x is
+ negative. */
+ if (unlikely (iax == 0x7ff0000000000000
+ || (exp_diff >= POW8_EXP_UFLOW_BOUND && m >= 2)))
+ {
+ if (iay == 0x7ff0000000000000)
+ {
+ switch (m)
+ {
+ case 0:
+ return PiOver4; /* atan(+INF,+INF). */
+ case 1:
+ return -PiOver4; /* atan(-INF,+INF). */
+ case 2:
+ return 3.0 * PiOver4; /* atan(+INF,-INF). */
+ case 3:
+ return -3.0 * PiOver4; /* atan(-INF,-INF). */
+ }
+ }
+ else
+ {
+ switch (m)
+ {
+ case 0:
+ return 0.0; /* atan(+...,+INF). */
+ case 1:
+ return -0.0; /* atan(-...,+INF). */
+ case 2:
+ return Pi; /* atan(+...,-INF). */
+ case 3:
+ return -Pi; /* atan(-...,-INF). */
+ }
+ }
+ }
+ /* y is INF. */
+ if (iay == 0x7ff0000000000000)
+ return sign_y ? -PiOver2 : PiOver2;
+
+ uint64_t sign_xy = sign_x ^ sign_y;
+
+ double ax = asdouble (iax);
+ double ay = asdouble (iay);
+ uint64_t pred_aygtax = (ay > ax);
+
+ /* Set up z for call to atan. */
+ double n = pred_aygtax ? -ax : ay;
+ double d = pred_aygtax ? ay : ax;
+ double z = n / d;
+
+ double ret;
+ if (unlikely (m < 2 && exp_diff >= POW8_EXP_UFLOW_BOUND))
+ {
+ /* If (x, y) is very close to x axis and x is positive, the polynomial
+ will underflow and evaluate to z. */
+ ret = z;
+ }
+ else
+ {
+ /* Work out the correct shift. */
+ double shift = sign_x ? -2.0 : 0.0;
+ shift = pred_aygtax ? shift + 1.0 : shift;
+ shift *= PiOver2;
+
+ ret = eval_poly (z, z, shift);
+ }
+
+ /* Account for the sign of x and y. */
+ return asdouble (asuint64 (ret) ^ sign_xy);
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (S, D, 2, atan2)
+PL_TEST_ULP (atan2, 1.78)
+PL_TEST_INTERVAL (atan2, -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (atan2, -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (atan2, 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (atan2, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (atan2, 1e6, 1e32, 40000)
diff --git a/pl/math/atan2f_3u.c b/pl/math/atan2f_3u.c
new file mode 100644
index 0000000..38e1df5
--- /dev/null
+++ b/pl/math/atan2f_3u.c
@@ -0,0 +1,167 @@
+/*
+ * Single-precision scalar atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdbool.h>
+
+#include "atanf_common.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Pi (0x1.921fb6p+1f)
+#define PiOver2 (0x1.921fb6p+0f)
+#define PiOver4 (0x1.921fb6p-1f)
+#define SignMask (0x80000000)
+
+/* We calculate atan2f by P(n/d), where n and d are similar to the input
+ arguments, and P is a polynomial. The polynomial may underflow.
+ POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
+ for which P underflows, and is used to special-case such inputs. */
+#define POLY_UFLOW_BOUND 24
+
+static inline int32_t
+biased_exponent (float f)
+{
+ uint32_t fi = asuint (f);
+ int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
+ if (unlikely (ex == 0))
+ {
+ /* Subnormal case - we still need to get the exponent right for subnormal
+ numbers as division may take us back inside the normal range. */
+ return ex - __builtin_clz (fi << 9);
+ }
+ return ex;
+}
+
+/* Fast implementation of scalar atan2f. Largest observed error is
+ 2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
+ atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
+ want 0x1.964640p-1. */
+float
+atan2f (float y, float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iy = asuint (y);
+
+ uint32_t sign_x = ix & SignMask;
+ uint32_t sign_y = iy & SignMask;
+
+ uint32_t iax = ix & ~SignMask;
+ uint32_t iay = iy & ~SignMask;
+
+ /* x or y is NaN. */
+ if ((iax > 0x7f800000) || (iay > 0x7f800000))
+ return x + y;
+
+ /* m = 2 * sign(x) + sign(y). */
+ uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
+
+ /* The following follows glibc ieee754 implementation, except
+ that we do not use +-tiny shifts (non-nearest rounding mode). */
+
+ int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
+
+ /* Special case for (x, y) either on or very close to the x axis. Either y =
+ 0, or y is tiny and x is huge (difference in exponents >=
+ POLY_UFLOW_BOUND). In the second case, we only want to use this special
+ case when x is negative (i.e. quadrants 2 or 3). */
+ if (unlikely (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
+ {
+ switch (m)
+ {
+ case 0:
+ case 1:
+ return y; /* atan(+-0,+anything)=+-0. */
+ case 2:
+ return Pi; /* atan(+0,-anything) = pi. */
+ case 3:
+ return -Pi; /* atan(-0,-anything) =-pi. */
+ }
+ }
+ /* Special case for (x, y) either on or very close to the y axis. Either x =
+ 0, or x is tiny and y is huge (difference in exponents >=
+ POLY_UFLOW_BOUND). */
+ if (unlikely (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
+ return sign_y ? -PiOver2 : PiOver2;
+
+ /* x is INF. */
+ if (iax == 0x7f800000)
+ {
+ if (iay == 0x7f800000)
+ {
+ switch (m)
+ {
+ case 0:
+ return PiOver4; /* atan(+INF,+INF). */
+ case 1:
+ return -PiOver4; /* atan(-INF,+INF). */
+ case 2:
+ return 3.0f * PiOver4; /* atan(+INF,-INF). */
+ case 3:
+ return -3.0f * PiOver4; /* atan(-INF,-INF). */
+ }
+ }
+ else
+ {
+ switch (m)
+ {
+ case 0:
+ return 0.0f; /* atan(+...,+INF). */
+ case 1:
+ return -0.0f; /* atan(-...,+INF). */
+ case 2:
+ return Pi; /* atan(+...,-INF). */
+ case 3:
+ return -Pi; /* atan(-...,-INF). */
+ }
+ }
+ }
+ /* y is INF. */
+ if (iay == 0x7f800000)
+ return sign_y ? -PiOver2 : PiOver2;
+
+ uint32_t sign_xy = sign_x ^ sign_y;
+
+ float ax = asfloat (iax);
+ float ay = asfloat (iay);
+
+ bool pred_aygtax = (ay > ax);
+
+ /* Set up z for call to atanf. */
+ float n = pred_aygtax ? -ax : ay;
+ float d = pred_aygtax ? ay : ax;
+ float z = n / d;
+
+ float ret;
+ if (unlikely (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
+ {
+ /* If (x, y) is very close to x axis and x is positive, the polynomial
+ will underflow and evaluate to z. */
+ ret = z;
+ }
+ else
+ {
+ /* Work out the correct shift. */
+ float shift = sign_x ? -2.0f : 0.0f;
+ shift = pred_aygtax ? shift + 1.0f : shift;
+ shift *= PiOver2;
+
+ ret = eval_poly (z, z, shift);
+ }
+
+ /* Account for the sign of x and y. */
+ return asfloat (asuint (ret) ^ sign_xy);
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (S, F, 2, atan2)
+PL_TEST_ULP (atan2f, 2.4)
+PL_TEST_INTERVAL (atan2f, -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (atan2f, -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (atan2f, 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (atan2f, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (atan2f, 1e6, 1e32, 40000)
diff --git a/pl/math/atan_2u5.c b/pl/math/atan_2u5.c
new file mode 100644
index 0000000..ee47701
--- /dev/null
+++ b/pl/math/atan_2u5.c
@@ -0,0 +1,73 @@
+/*
+ * Double-precision atan(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "atan_common.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define PiOver2 0x1.921fb54442d18p+0
+#define TinyBound 0x3e1 /* top12(asuint64(0x1p-30)). */
+#define BigBound 0x434 /* top12(asuint64(0x1p53)). */
+#define OneTop 0x3ff
+
+/* Fast implementation of double-precision atan.
+ Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
+ atan(0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
+ want 0x1.9225645bdd7c3p-1. */
+double
+atan (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t sign = ix & ~AbsMask;
+ uint64_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 52;
+
+ if (unlikely (ia12 >= BigBound || ia12 < TinyBound))
+ {
+ if (ia12 < TinyBound)
+ /* Avoid underflow by returning x. */
+ return x;
+ if (ia > 0x7ff0000000000000)
+ /* Propagate NaN. */
+ return __math_invalid (x);
+ /* atan(x) rounds to PiOver2 for large x. */
+ return asdouble (asuint64 (PiOver2) ^ sign);
+ }
+
+ double z, az, shift;
+ if (ia12 >= OneTop)
+ {
+ /* For x > 1, use atan(x) = pi / 2 + atan(-1 / x). */
+ z = -1.0 / x;
+ shift = PiOver2;
+ /* Use absolute value only when needed (odd powers of z). */
+ az = -fabs (z);
+ }
+ else
+ {
+ /* For x < 1, approximate atan(x) directly. */
+ z = x;
+ shift = 0;
+ az = asdouble (ia);
+ }
+
+ /* Calculate polynomial, shift + z + z^3 * P(z^2). */
+ double y = eval_poly (z, az, shift);
+ /* Copy sign. */
+ return asdouble (asuint64 (y) ^ sign);
+}
+
+PL_SIG (S, D, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (atan, 1.78)
+PL_TEST_INTERVAL (atan, 0, 0x1p-30, 10000)
+PL_TEST_INTERVAL (atan, -0, -0x1p-30, 1000)
+PL_TEST_INTERVAL (atan, 0x1p-30, 0x1p53, 900000)
+PL_TEST_INTERVAL (atan, -0x1p-30, -0x1p53, 90000)
+PL_TEST_INTERVAL (atan, 0x1p53, inf, 10000)
+PL_TEST_INTERVAL (atan, -0x1p53, -inf, 1000)
diff --git a/pl/math/atan_common.h b/pl/math/atan_common.h
new file mode 100644
index 0000000..da0da64
--- /dev/null
+++ b/pl/math/atan_common.h
@@ -0,0 +1,49 @@
+/*
+ * Double-precision polynomial evaluation function for scalar and vector atan(x)
+ * and atan2(y,x).
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "estrin.h"
+
+#if V_SUPPORTED
+
+#include "v_math.h"
+
+#define DBL_T v_f64_t
+#define P(i) v_f64 (__atan_poly_data.poly[i])
+
+#else
+
+#define DBL_T double
+#define P(i) __atan_poly_data.poly[i]
+
+#endif
+
+/* Polynomial used in fast atan(x) and atan2(y,x) implementations
+ The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
+static inline DBL_T
+eval_poly (DBL_T z, DBL_T az, DBL_T shift)
+{
+ /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
+ full scheme to avoid underflow in x^16. */
+ DBL_T z2 = z * z;
+ DBL_T x2 = z2 * z2;
+ DBL_T x4 = x2 * x2;
+ DBL_T x8 = x4 * x4;
+ DBL_T y
+ = FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P));
+
+ /* Finalize. y = shift + z + z^3 * P(z^2). */
+ y = FMA (y, z2 * az, az);
+ y = y + shift;
+
+ return y;
+}
+
+#undef DBL_T
+#undef FMA
+#undef P
diff --git a/pl/math/atan_data.c b/pl/math/atan_data.c
new file mode 100644
index 0000000..91d0f61
--- /dev/null
+++ b/pl/math/atan_data.c
@@ -0,0 +1,20 @@
+/*
+ * Double-precision polynomial coefficients for vector atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct atan_poly_data __atan_poly_data = {
+ .poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-1022, 1.0]. See atan.sollya for details of how these were
+ generated. */
+ -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
+ 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
+ -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
+ 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
+ -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
+ 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
+ -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};
diff --git a/pl/math/atanf_2u9.c b/pl/math/atanf_2u9.c
new file mode 100644
index 0000000..9d17f25
--- /dev/null
+++ b/pl/math/atanf_2u9.c
@@ -0,0 +1,76 @@
+/*
+ * Single-precision atan(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "atanf_common.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define PiOver2 0x1.921fb6p+0f
+#define AbsMask 0x7fffffff
+#define TinyBound 0x30800000 /* asuint(0x1p-30). */
+#define BigBound 0x4e800000 /* asuint(0x1p30). */
+#define One 0x3f800000
+
+/* Approximation of single-precision atan(x) based on
+ atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
+ using z=-1/x and shift = pi/2.
+ Maximum error is 2.88 ulps:
+ atanf(0x1.0565ccp+0) got 0x1.97771p-1
+ want 0x1.97770ap-1. */
+float
+atanf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t sign = ix & ~AbsMask;
+ uint32_t ia = ix & AbsMask;
+
+ if (unlikely (ia < TinyBound))
+ /* Avoid underflow by returning x. */
+ return x;
+
+ if (unlikely (ia > BigBound))
+ {
+ if (ia > 0x7f800000)
+ /* Propagate NaN. */
+ return __math_invalidf (x);
+ /* atan(x) rounds to PiOver2 for large x. */
+ return asfloat (asuint (PiOver2) ^ sign);
+ }
+
+ float z, az, shift;
+ if (ia > One)
+ {
+ /* For x > 1, use atan(x) = pi / 2 + atan(-1 / x). */
+ z = -1.0f / x;
+ shift = PiOver2;
+ /* Use absolute value only when needed (odd powers of z). */
+ az = -fabsf (z);
+ }
+ else
+ {
+ /* For x < 1, approximate atan(x) directly. */
+ z = x;
+ az = asfloat (ia);
+ shift = 0;
+ }
+
+ /* Calculate polynomial, shift + z + z^3 * P(z^2). */
+ float y = eval_poly (z, az, shift);
+ /* Copy sign. */
+ return asfloat (asuint (y) ^ sign);
+}
+
+PL_SIG (S, F, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (atanf, 2.38)
+PL_TEST_INTERVAL (atanf, 0, 0x1p-30, 5000)
+PL_TEST_INTERVAL (atanf, -0, -0x1p-30, 5000)
+PL_TEST_INTERVAL (atanf, 0x1p-30, 1, 40000)
+PL_TEST_INTERVAL (atanf, -0x1p-30, -1, 40000)
+PL_TEST_INTERVAL (atanf, 1, 0x1p30, 40000)
+PL_TEST_INTERVAL (atanf, -1, -0x1p30, 40000)
+PL_TEST_INTERVAL (atanf, 0x1p30, inf, 1000)
+PL_TEST_INTERVAL (atanf, -0x1p30, -inf, 1000)
diff --git a/pl/math/atanf_common.h b/pl/math/atanf_common.h
new file mode 100644
index 0000000..37ca76d
--- /dev/null
+++ b/pl/math/atanf_common.h
@@ -0,0 +1,51 @@
+/*
+ * Single-precision polynomial evaluation function for scalar and vector
+ * atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_ATANF_COMMON_H
+#define PL_MATH_ATANF_COMMON_H
+
+#include "math_config.h"
+#include "estrinf.h"
+
+#if V_SUPPORTED
+
+#include "v_math.h"
+
+#define FLT_T v_f32_t
+#define P(i) v_f32 (__atanf_poly_data.poly[i])
+
+#else
+
+#define FLT_T float
+#define P(i) __atanf_poly_data.poly[i]
+
+#endif
+
+/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
+ The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
+static inline FLT_T
+eval_poly (FLT_T z, FLT_T az, FLT_T shift)
+{
+ /* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
+ a standard implementation using z8 creates spurious underflow
+ in the very last fma (when z^8 is small enough).
+ Therefore, we split the last fma into a mul and and an fma.
+ Horner and single-level Estrin have higher errors that exceed
+ threshold. */
+ FLT_T z2 = z * z;
+ FLT_T z4 = z2 * z2;
+
+ /* Then assemble polynomial. */
+ FLT_T y = FMA (z4, z4 * ESTRIN_3_ (z2, z4, P, 4), ESTRIN_3 (z2, z4, P));
+
+ /* Finalize:
+ y = shift + z * P(z^2). */
+ return FMA (y, z2 * az, az) + shift;
+}
+
+#endif // PL_MATH_ATANF_COMMON_H
diff --git a/pl/math/atanf_data.c b/pl/math/atanf_data.c
new file mode 100644
index 0000000..c4cba23
--- /dev/null
+++ b/pl/math/atanf_data.c
@@ -0,0 +1,15 @@
+/*
+ * Single-precision polynomial coefficients for vector atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
+ */
+const struct atanf_poly_data __atanf_poly_data = {
+ .poly = {/* See atanf.sollya for details of how these were generated. */
+ -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
+ -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};
diff --git a/pl/math/atanh_3u.c b/pl/math/atanh_3u.c
new file mode 100644
index 0000000..a168cd5
--- /dev/null
+++ b/pl/math/atanh_3u.c
@@ -0,0 +1,86 @@
+/*
+ * Double-precision atanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "estrin.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define One 0x3ff0000000000000
+#define Ln2Hi 0x1.62e42fefa3800p-1
+#define Ln2Lo 0x1.ef35793c76730p-45
+#define OneMHfRt2Top \
+ 0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)). */
+#define OneTop12 0x3ff
+#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)). */
+#define BottomMask 0xffffffff
+#define C(i) __log1p_data.coeffs[i]
+
+static inline double
+log1p_inline (double x)
+{
+ /* Helper for calculating log(1 + x) using order-18 polynomial on a reduced
+ interval. Copied from log1p_2u.c, with no special-case handling. See that
+ file for details of the algorithm. */
+ double m = x + 1;
+ uint64_t mi = asuint64 (m);
+
+ /* Decompose x + 1 into (f + 1) * 2^k, with k chosen such that f is in
+ [sqrt(2)/2, sqrt(2)]. */
+ uint32_t u = (mi >> 32) + OneMHfRt2Top;
+ int32_t k = (int32_t) (u >> 20) - OneTop12;
+ uint32_t utop = (u & 0x000fffff) + HfRt2Top;
+ uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
+ double f = asdouble (u_red) - 1;
+
+ /* Correction term for round-off in f. */
+ double cm = (x - (m - 1)) / m;
+
+ /* Approximate log1p(f) with polynomial. */
+ double f2 = f * f;
+ double f4 = f2 * f2;
+ double f8 = f4 * f4;
+ double p = fma (f, ESTRIN_18 (f, f2, f4, f8, f8 * f8, C) * f, f);
+
+ /* Recombine log1p(x) = k*log2 + log1p(f) + c/m. */
+ double kd = k;
+ double y = fma (Ln2Lo, kd, cm);
+ return y + fma (Ln2Hi, kd, p);
+}
+
+/* Approximation for double-precision inverse tanh(x), using a simplified
+ version of log1p. Greatest observed error is 3.00 ULP:
+ atanh(0x1.e58f3c108d714p-4) got 0x1.e7da77672a647p-4
+ want 0x1.e7da77672a64ap-4. */
+double
+atanh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t sign = ix & ~AbsMask;
+ uint64_t ia = ix & AbsMask;
+
+ if (unlikely (ia == One))
+ return __math_divzero (sign >> 32);
+
+ if (unlikely (ia > One))
+ return __math_invalid (x);
+
+ double halfsign = asdouble (Half | sign);
+ double ax = asdouble (ia);
+ return halfsign * log1p_inline ((2 * ax) / (1 - ax));
+}
+
+PL_SIG (S, D, 1, atanh, -1.0, 1.0)
+PL_TEST_ULP (atanh, 3.00)
+PL_TEST_INTERVAL (atanh, 0, 0x1p-23, 10000)
+PL_TEST_INTERVAL (atanh, -0, -0x1p-23, 10000)
+PL_TEST_INTERVAL (atanh, 0x1p-23, 1, 90000)
+PL_TEST_INTERVAL (atanh, -0x1p-23, -1, 90000)
+PL_TEST_INTERVAL (atanh, 1, inf, 100)
+PL_TEST_INTERVAL (atanh, -1, -inf, 100)
diff --git a/pl/math/atanhf_3u1.c b/pl/math/atanhf_3u1.c
new file mode 100644
index 0000000..fb90aa2
--- /dev/null
+++ b/pl/math/atanhf_3u1.c
@@ -0,0 +1,88 @@
+/*
+ * Single-precision atanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define Four 0x40800000
+#define Ln2 0x1.62e43p-1f
+#define TinyBound 0x39800000 /* 0x1p-12, below which atanhf(x) rounds to x. */
+
+#define C(i) __log1pf_data.coeffs[i]
+
+static inline float
+eval_poly (float m)
+{
+ /* Approximate log(1+m) on [-0.25, 0.5] using Estrin scheme. */
+ float p_12 = fmaf (m, C (1), C (0));
+ float p_34 = fmaf (m, C (3), C (2));
+ float p_56 = fmaf (m, C (5), C (4));
+ float p_78 = fmaf (m, C (7), C (6));
+
+ float m2 = m * m;
+ float p_02 = fmaf (m2, p_12, m);
+ float p_36 = fmaf (m2, p_56, p_34);
+ float p_79 = fmaf (m2, C (8), p_78);
+
+ float m4 = m2 * m2;
+ float p_06 = fmaf (m4, p_36, p_02);
+
+ return fmaf (m4 * p_79, m4, p_06);
+}
+
+static inline float
+log1pf_inline (float x)
+{
+ /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
+ special-case handling. See that file for details of the algorithm. */
+ float m = x + 1.0f;
+ int k = (asuint (m) - 0x3f400000) & 0xff800000;
+ float s = asfloat (Four - k);
+ float m_scale = asfloat (asuint (x) - k) + fmaf (0.25f, s, -1.0f);
+ float p = eval_poly (m_scale);
+ float scale_back = (float) k * 0x1.0p-23f;
+ return fmaf (scale_back, Ln2, p);
+}
+
+/* Approximation for single-precision inverse tanh(x), using a simplified
+ version of log1p. Maximum error is 3.08 ULP:
+ atanhf(0x1.ff0d5p-5) got 0x1.ffb768p-5
+ want 0x1.ffb76ep-5. */
+float
+atanhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ uint32_t sign = ix & ~AbsMask;
+
+ if (unlikely (iax < TinyBound))
+ return x;
+
+ if (iax == One)
+ return __math_divzero (sign);
+
+ if (unlikely (iax > One))
+ return __math_invalidf (x);
+
+ float halfsign = asfloat (Half | sign);
+ float ax = asfloat (iax);
+ return halfsign * log1pf_inline ((2 * ax) / (1 - ax));
+}
+
+PL_SIG (S, F, 1, atanh, -1.0, 1.0)
+PL_TEST_ULP (atanhf, 2.59)
+PL_TEST_INTERVAL (atanhf, 0, 0x1p-12, 500)
+PL_TEST_INTERVAL (atanhf, 0x1p-12, 1, 200000)
+PL_TEST_INTERVAL (atanhf, 1, inf, 1000)
+PL_TEST_INTERVAL (atanhf, -0, -0x1p-12, 500)
+PL_TEST_INTERVAL (atanhf, -0x1p-12, -1, 200000)
+PL_TEST_INTERVAL (atanhf, -1, -inf, 1000)
diff --git a/pl/math/cbrt_2u.c b/pl/math/cbrt_2u.c
new file mode 100644
index 0000000..83715dd
--- /dev/null
+++ b/pl/math/cbrt_2u.c
@@ -0,0 +1,70 @@
+/*
+ * Double-precision cbrt(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+PL_SIG (S, D, 1, cbrt, -10.0, 10.0)
+
+#define AbsMask 0x7fffffffffffffff
+#define TwoThirds 0x1.5555555555555p-1
+
+#define C(i) __cbrt_data.poly[i]
+#define T(i) __cbrt_data.table[i]
+
+/* Approximation for double-precision cbrt(x), using low-order polynomial and
+ two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
+ according to the exponent, for instance an error observed for double value
+ m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
+ integer.
+ cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
+ want 0x1.965fe72821e99p+0. */
+double
+cbrt (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iax = ix & AbsMask;
+ uint64_t sign = ix & ~AbsMask;
+
+ if (unlikely (iax == 0 || iax == 0x7f80000000000000))
+ return x;
+
+ /* |x| = m * 2^e, where m is in [0.5, 1.0].
+ We can easily decompose x into m and e using frexp. */
+ int e;
+ double m = frexp (asdouble (iax), &e);
+
+ /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
+ Newton iterations. */
+ double p_01 = fma (C (1), m, C (0));
+ double p_23 = fma (C (3), m, C (2));
+ double p = fma (p_23, m * m, p_01);
+
+ /* Two iterations of Newton's method for iteratively approximating cbrt. */
+ double m_by_3 = m / 3;
+ double a = fma (TwoThirds, p, m_by_3 / (p * p));
+ a = fma (TwoThirds, a, m_by_3 / (a * a));
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ Let t = (2 ^ (e / 3)) / (2 ^ round(e / 3)).
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3.
+ i is an integer in [-2, 2], so t can be looked up in the table T.
+ Hence the result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.
+ Which can be done easily using ldexp. */
+ return asdouble (asuint64 (ldexp (a * T (2 + e % 3), e / 3)) | sign);
+}
+
+PL_TEST_ULP (cbrt, 1.30)
+PL_TEST_INTERVAL (cbrt, 0, inf, 1000000)
+PL_TEST_INTERVAL (cbrt, -0, -inf, 1000000)
diff --git a/pl/math/cbrt_data.c b/pl/math/cbrt_data.c
new file mode 100644
index 0000000..3d484c2
--- /dev/null
+++ b/pl/math/cbrt_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients and table entries for double-precision cbrt(x).
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct cbrt_data __cbrt_data
+ = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1].
+ See cbrt.sollya for details of generation. */
+ 0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1, 0x1.2c74eaa3ba428p-3},
+ .table = { /* table[i] = 2^((i - 2) / 3). */
+ 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}};
diff --git a/pl/math/cbrtf_1u5.c b/pl/math/cbrtf_1u5.c
new file mode 100644
index 0000000..adc5917
--- /dev/null
+++ b/pl/math/cbrtf_1u5.c
@@ -0,0 +1,67 @@
+/*
+ * Single-precision cbrt(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "estrinf.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define SignMask 0x80000000
+#define TwoThirds 0x1.555556p-1f
+
+#define C(i) __cbrtf_data.poly[i]
+#define T(i) __cbrtf_data.table[i]
+
+/* Approximation for single-precision cbrt(x), using low-order polynomial and
+ one Newton iteration on a reduced interval. Greatest error is 1.5 ULP. This
+ is observed for every value where the mantissa is 0x1.81410e and the exponent
+ is a multiple of 3, for example:
+ cbrtf(0x1.81410ep+30) got 0x1.255d96p+10
+ want 0x1.255d92p+10. */
+float
+cbrtf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ uint32_t sign = ix & SignMask;
+
+ if (unlikely (iax == 0 || iax == 0x7f800000))
+ return x;
+
+ /* |x| = m * 2^e, where m is in [0.5, 1.0].
+ We can easily decompose x into m and e using frexpf. */
+ int e;
+ float m = frexpf (asfloat (iax), &e);
+
+ /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+ the less accurate the next stage of the algorithm needs to be. An order-4
+ polynomial is enough for one Newton iteration. */
+ float p = ESTRIN_3 (m, m * m, C);
+ /* One iteration of Newton's method for iteratively approximating cbrt. */
+ float m_by_3 = m / 3;
+ float a = fmaf (TwoThirds, p, m_by_3 / (p * p));
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ Let t = (2 ^ (e / 3)) / (2 ^ round(e / 3)).
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3.
+ i is an integer in [-2, 2], so t can be looked up in the table T.
+ Hence the result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.
+ Which can be done easily using ldexpf. */
+ return asfloat (asuint (ldexpf (a * T (2 + e % 3), e / 3)) | sign);
+}
+
+PL_SIG (S, F, 1, cbrt, -10.0, 10.0)
+PL_TEST_ULP (cbrtf, 1.03)
+PL_TEST_INTERVAL (cbrtf, 0, inf, 1000000)
+PL_TEST_INTERVAL (cbrtf, -0, -inf, 1000000)
diff --git a/pl/math/cbrtf_data.c b/pl/math/cbrtf_data.c
new file mode 100644
index 0000000..c6cdb4d
--- /dev/null
+++ b/pl/math/cbrtf_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients and table entries for single-precision cbrt(x).
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct cbrtf_data __cbrtf_data
+ = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1].
+ See cbrtf.sollya for details of generation. */
+ 0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1, 0x1.2c74c2p-3},
+ .table = { /* table[i] = 2^((i - 2) / 3). */
+ 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0}};
diff --git a/pl/math/cosh_2u.c b/pl/math/cosh_2u.c
new file mode 100644
index 0000000..5d1df07
--- /dev/null
+++ b/pl/math/cosh_2u.c
@@ -0,0 +1,66 @@
+/*
+ * Double-precision cosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define SpecialBound \
+ 0x40861da04cbafe44 /* 0x1.61da04cbafe44p+9, above which exp overflows. */
+
+double
+__exp_dd (double, double);
+
+static double
+specialcase (double x, uint64_t iax)
+{
+ if (iax == 0x7ff0000000000000)
+ return INFINITY;
+ if (iax > 0x7ff0000000000000)
+ return __math_invalid (x);
+ /* exp overflows above SpecialBound. At this magnitude cosh(x) is dominated by
+ exp(x), so we can approximate cosh(x) by (exp(|x|/2)) ^ 2 / 2. */
+ double t = __exp_dd (asdouble (iax) / 2, 0);
+ return (0.5 * t) * t;
+}
+
+/* Approximation for double-precision cosh(x).
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The greatest observed error is in the special region, 1.93 ULP:
+ cosh(0x1.628af341989dap+9) got 0x1.fdf28623ef921p+1021
+ want 0x1.fdf28623ef923p+1021.
+
+ The greatest observed error in the non-special region is 1.03 ULP:
+ cosh(0x1.502cd8e56ab3bp+0) got 0x1.fe54962842d0ep+0
+ want 0x1.fe54962842d0fp+0. */
+double
+cosh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iax = ix & AbsMask;
+
+ /* exp overflows a little bit before cosh, so use special-case handler for the
+ gap, as well as special values. */
+ if (unlikely (iax >= SpecialBound))
+ return specialcase (x, iax);
+
+ double ax = asdouble (iax);
+ /* Use double-precision exp helper to calculate exp(x), then:
+ cosh(x) = exp(|x|) / 2 + 1 / (exp(|x| * 2). */
+ double t = __exp_dd (ax, 0);
+ return 0.5 * t + 0.5 / t;
+}
+
+PL_SIG (S, D, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (cosh, 1.43)
+PL_TEST_INTERVAL (cosh, 0, 0x1.61da04cbafe44p+9, 100000)
+PL_TEST_INTERVAL (cosh, -0, -0x1.61da04cbafe44p+9, 100000)
+PL_TEST_INTERVAL (cosh, 0x1.61da04cbafe44p+9, 0x1p10, 1000)
+PL_TEST_INTERVAL (cosh, -0x1.61da04cbafe44p+9, -0x1p10, 1000)
+PL_TEST_INTERVAL (cosh, 0x1p10, inf, 100)
+PL_TEST_INTERVAL (cosh, -0x1p10, -inf, 100)
diff --git a/pl/math/coshf_1u9.c b/pl/math/coshf_1u9.c
new file mode 100644
index 0000000..c125c92
--- /dev/null
+++ b/pl/math/coshf_1u9.c
@@ -0,0 +1,71 @@
+/*
+ * Single-precision cosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define TinyBound 0x20000000 /* 0x1p-63: Round to 1 below this. */
+#define SpecialBound \
+ 0x42ad496c /* 0x1.5a92d8p+6: expf overflows above this, so have to use \
+ special case. */
+
+float
+optr_aor_exp_f32 (float);
+
+static NOINLINE float
+specialcase (float x, uint32_t iax)
+{
+ if (iax == 0x7f800000)
+ return INFINITY;
+ if (iax > 0x7f800000)
+ return __math_invalidf (x);
+ if (iax <= TinyBound)
+ /* For tiny x, avoid underflow by just returning 1. */
+ return 1;
+ /* Otherwise SpecialBound <= |x| < Inf. x is too large to calculate exp(x)
+ without overflow, so use exp(|x|/2) instead. For large x cosh(x) is
+ dominated by exp(x), so return:
+ cosh(x) ~= (exp(|x|/2))^2 / 2. */
+ float t = optr_aor_exp_f32 (asfloat (iax) / 2);
+ return (0.5 * t) * t;
+}
+
+/* Approximation for single-precision cosh(x) using exp.
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The maximum error is 1.89 ULP, observed for |x| > SpecialBound:
+ coshf(0x1.65898cp+6) got 0x1.f00aep+127 want 0x1.f00adcp+127.
+ The maximum error observed for TinyBound < |x| < SpecialBound is 1.02 ULP:
+ coshf(0x1.50a3cp+0) got 0x1.ff21dcp+0 want 0x1.ff21dap+0. */
+float
+coshf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ float ax = asfloat (iax);
+
+ if (unlikely (iax <= TinyBound || iax >= SpecialBound))
+ {
+ /* x is tiny, large or special. */
+ return specialcase (x, iax);
+ }
+
+ /* Compute cosh using the definition:
+ coshf(x) = exp(x) / 2 + exp(-x) / 2. */
+ float t = optr_aor_exp_f32 (ax);
+ return 0.5f * t + 0.5f / t;
+}
+
+PL_SIG (S, F, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (coshf, 1.89)
+PL_TEST_INTERVAL (coshf, 0, 0x1p-63, 100)
+PL_TEST_INTERVAL (coshf, 0, 0x1.5a92d8p+6, 80000)
+PL_TEST_INTERVAL (coshf, 0x1.5a92d8p+6, inf, 2000)
+PL_TEST_INTERVAL (coshf, -0, -0x1p-63, 100)
+PL_TEST_INTERVAL (coshf, -0, -0x1.5a92d8p+6, 80000)
+PL_TEST_INTERVAL (coshf, -0x1.5a92d8p+6, -inf, 2000)
diff --git a/pl/math/erfc_4u5.c b/pl/math/erfc_4u5.c
new file mode 100644
index 0000000..e9af9d3
--- /dev/null
+++ b/pl/math/erfc_4u5.c
@@ -0,0 +1,155 @@
+/*
+ * Double-precision erfc(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pairwise_horner.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask (0x7fffffffffffffff)
+
+#define xint __erfc_data.interval_bounds
+#define PX __erfc_data.poly
+
+/* Accurate exponential from optimized routines. */
+double
+__exp_dd (double x, double xtail);
+
+static inline double
+eval_poly_horner (double z, int i)
+{
+ double z2 = z * z;
+#define C(j) PX[i][j]
+ return PAIRWISE_HORNER_12 (z, z2, C);
+}
+
+/* Accurate evaluation of exp(x^2)
+ using compensated product (x^2 ~ x*x + e2)
+ and the __exp_dd(y,d) routine, that is the
+ computation of exp(y+d) with a small correction d<<y. */
+static inline double
+eval_accurate_gaussian (double a)
+{
+ double e2;
+ double a2 = a * a;
+ double aa1 = -fma (0x1.0000002p27, a, -a);
+ aa1 = fma (0x1.0000002p27, a, aa1);
+ double aa2 = a - aa1;
+ e2 = fma (-aa1, aa1, a2);
+ e2 = fma (-aa1, aa2, e2);
+ e2 = fma (-aa2, aa1, e2);
+ e2 = fma (-aa2, aa2, e2);
+ return __exp_dd (-a2, e2);
+}
+
+/* Approximation of erfc for |x| > 6.0. */
+static inline double
+approx_erfc_hi (double x, int i)
+{
+ double a = fabs (x);
+ double z = a - xint[i];
+ double p = eval_poly_horner (z, i);
+ double e_mx2 = eval_accurate_gaussian (a);
+ return p * e_mx2;
+}
+
+static inline int
+get_itv_idx (double x)
+{
+ /* Interval bounds are a logarithmic scale, i.e. interval n has
+ lower bound 2^(n/4) - 1. Use the exponent of (|x|+1)^4 to obtain
+ the interval index. */
+ double a = asdouble (asuint64 (x) & AbsMask);
+ double z = a + 1.0;
+ z = z * z;
+ z = z * z;
+ return (asuint64 (z) >> 52) - 1023;
+}
+
+/* Approximation of erfc for |x| < 6.0. */
+static inline double
+approx_erfc_lo (double x, uint32_t sign, int i)
+{
+ double a = fabs (x);
+ double z = a - xint[i];
+ double p = eval_poly_horner (z, i);
+ double e_mx2 = eval_accurate_gaussian (a);
+ if (sign)
+ return fma (-p, e_mx2, 2.0);
+ else
+ return p * e_mx2;
+}
+
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t
+abstop12 (double x)
+{
+ return (asuint64 (x) >> 52) & 0x7ff;
+}
+
+/* Top 32 bits of a double. */
+static inline uint32_t
+top32 (double x)
+{
+ return asuint64 (x) >> 32;
+}
+
+/* Fast erfc implementation.
+ The approximation uses polynomial approximation of
+ exp(x^2) * erfc(x) with fixed orders on 20 intervals.
+ Maximum measured error is 4.05 ULPs:.
+ erfc(0x1.e8ebf6a2b0801p-2) got 0x1.ff84036f8f0b3p-2
+ want 0x1.ff84036f8f0b7p-2. */
+double
+erfc (double x)
+{
+ /* Get top words. */
+ uint32_t ix = top32 (x); /* We need to compare at most 32 bits. */
+ uint32_t ia = ix & 0x7fffffff;
+ uint32_t sign = ix >> 31;
+
+ /* Handle special cases and small values with a single comparison:
+ abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
+ Special cases erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2
+ Errno EDOM does not have to be set in case of erfc(nan).
+ Only ERANGE may be set in case of underflow.
+ Small values (|x|<small)
+ |x|<0x1.0p-56 => accurate up to 0.5 ULP (top12(0x1p-50) = 0x3c7)
+ |x|<0x1.0p-50 => accurate up to 1.0 ULP (top12(0x1p-50) = 0x3cd). */
+ if (unlikely (abstop12 (x) - 0x3cd >= (abstop12 (INFINITY) & 0x7ff) - 0x3cd))
+ {
+ if (abstop12 (x) >= 0x7ff)
+ return (double) (sign << 1) + 1.0 / x; /* special cases. */
+ else
+ return 1.0 - x; /* small case. */
+ }
+ else if (ia < 0x40180000)
+ { /* |x| < 6.0. */
+ return approx_erfc_lo (x, sign, get_itv_idx (x));
+ }
+ else if (sign)
+ { /* x <= -6.0. */
+ return 2.0;
+ }
+ else if (ia < 0x403c0000)
+ { /* 6.0 <= x < 28. */
+ return approx_erfc_hi (x, get_itv_idx (x));
+ }
+ else
+ { /* x > 28. */
+ return __math_uflow (0);
+ }
+}
+
+PL_SIG (S, D, 1, erfc, -6.0, 28.0)
+PL_TEST_ULP (erfc, 3.56)
+PL_TEST_INTERVAL (erfc, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (erfc, 0x1p-1022, 0x1p-26, 40000)
+PL_TEST_INTERVAL (erfc, -0x1p-1022, -0x1p-26, 40000)
+PL_TEST_INTERVAL (erfc, 0x1p-26, 0x1p5, 40000)
+PL_TEST_INTERVAL (erfc, -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (erfc, 0, inf, 40000)
diff --git a/pl/math/erfc_data.c b/pl/math/erfc_data.c
new file mode 100644
index 0000000..fa7184f
--- /dev/null
+++ b/pl/math/erfc_data.c
@@ -0,0 +1,145 @@
+/*
+ * Data used in double-precision erfc(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Polynomial coefficients for approximating erfc(x)*exp(x*x) in double
+ precision. Generated using the Remez algorithm on each interval separately
+ (see erfc.sollya for more detail). */
+const struct erfc_data __erfc_data = {
+
+/* Bounds for 20 intervals spanning [0x1.0p-50., 31.]. Interval bounds are a
+ logarithmic scale, i.e. interval n has lower bound 2^(n/4) - 1, with the
+ exception of the first interval. */
+.interval_bounds = {
+ 0x1.0p-50, /* Tiny boundary. */
+ 0x1.837f05c490126p-3, /* 0.189. */
+ 0x1.a827997709f7ap-2, /* 0.414. */
+ 0x1.5d13f326fe9c8p-1, /* 0.682. */
+ 0x1.0p0, /* 1.000. */
+ 0x1.60dfc14636e2ap0, /* 1.378. */
+ 0x1.d413cccfe779ap0, /* 1.828. */
+ 0x1.2e89f995ad3adp1, /* 2.364. */
+ 0x1.8p1, /* 3.000. */
+ 0x1.e0dfc14636e2ap1, /* 3.757. */
+ 0x1.2a09e667f3bcdp2, /* 4.657. */
+ 0x1.6e89f995ad3adp2, /* 5.727. */
+ 0x1.cp2, /* 7.000. */
+ 0x1.106fe0a31b715p3, /* 8.514. */
+ 0x1.4a09e667f3bcdp3, /* 10.31. */
+ 0x1.8e89f995ad3adp3, /* 12.45. */
+ 0x1.ep3, /* 15.00. */
+ 0x1.206fe0a31b715p4, /* 18.03. */
+ 0x1.5a09e667f3bcdp4, /* 21.63. */
+ 0x1.9e89f995ad3adp4, /* 25.91. */
+ 0x1.fp4 /* 31.00. */
+},
+
+/* Coefficients for each order 12 polynomial on each of the 20 intervals. */
+.poly = {
+ {0x1.ffffffffffff6p-1, -0x1.20dd750429b66p0, 0x1.fffffffffffdcp-1,
+ -0x1.812746b03713ap-1, 0x1.ffffffffbe94cp-2, -0x1.341f6bb6ec9a6p-2,
+ 0x1.555553a70ec2ep-3, -0x1.6023b4617a388p-4, 0x1.5550f0e40bfbap-5,
+ -0x1.38c290c0c8de8p-6, 0x1.0e84002c6274ep-7, -0x1.a599eb0ac5d04p-9,
+ 0x1.c9bfafa73899cp-11},
+ {0x1.a2b43dbd503c8p-1, -0x1.a3495b7c9e6a4p-1, 0x1.535f3fb8cb92ap-1,
+ -0x1.d96ee9c714f44p-2, 0x1.26956676d2c64p-2, -0x1.4e2820da90c08p-3,
+ 0x1.5ea0cffac775ap-4, -0x1.57fb82ca373e8p-5, 0x1.3e0e8f48ba0f8p-6,
+ -0x1.16a695af1bbd4p-7, 0x1.cc836241a87d4p-9, -0x1.531de41264fdap-10,
+ 0x1.526a8a14e9bfcp-12},
+ {0x1.532e75821ed48p-1, -0x1.28be350460782p-1, 0x1.b08873adbf108p-2,
+ -0x1.14377569249e2p-2, 0x1.3e1ece8cd10dap-3, -0x1.5087e2e6dc2e8p-4,
+ 0x1.4b3adb3bb335ap-5, -0x1.32342d711a4f4p-6, 0x1.0bc4f6ce2b656p-7,
+ -0x1.bcdaa331f2144p-9, 0x1.5c21c9e0ca954p-10, -0x1.dfdc9b3b5c402p-12,
+ 0x1.b451af7dd52fep-14},
+ {0x1.10f9745a4f44ap-1, -0x1.9b03213e6963ap-2, 0x1.09b942bc8de66p-2,
+ -0x1.32755394481e4p-3, 0x1.42819b18af0e4p-4, -0x1.3a6d643aaa572p-5,
+ 0x1.1f17897603eaep-6, -0x1.eefb8d3f89d42p-8, 0x1.95559544f2fbp-9,
+ -0x1.3c2a67c33338p-10, 0x1.cffa784efe6cp-12, -0x1.282646774689cp-13,
+ 0x1.e654e67532b44p-16},
+ {0x1.b5d8780f956b2p-2, -0x1.17c4e3f17c04dp-2, 0x1.3c27283c328dbp-3,
+ -0x1.44837f88ea4bdp-4, 0x1.33cad0e887482p-5, -0x1.10fcf0bc8963cp-6,
+ 0x1.c8cb68153ec42p-8, -0x1.6aef9a9842c54p-9, 0x1.1334345d6467cp-10,
+ -0x1.8ebe8763a2a8cp-12, 0x1.0f457219dec0dp-13, -0x1.3d2501dcd2a0fp-15,
+ 0x1.d213a128a75c9p-18},
+ {0x1.5ee444130b7dbp-2, -0x1.78396ab208478p-3, 0x1.6e617ec5c0cc3p-4,
+ -0x1.49e60f63656b5p-5, 0x1.16064fddbbcb9p-6, -0x1.ba80af6a31018p-8,
+ 0x1.4ec374269d4ecp-9, -0x1.e40be960703a4p-11, 0x1.4fb029f35a144p-12,
+ -0x1.be45fd71a60eap-14, 0x1.161235cd2a3e7p-15, -0x1.264890eb1b5ebp-17,
+ 0x1.7f90154bde15dp-20},
+ {0x1.19a22c064d4eap-2, -0x1.f645498cae217p-4, 0x1.a0565950e3f08p-5,
+ -0x1.446605c21c178p-6, 0x1.df1231d75622fp-8, -0x1.515167553de25p-9,
+ 0x1.c72c1b4a2a57fp-11, -0x1.276ae9394ecf1p-12, 0x1.71d2696d6c8c3p-14,
+ -0x1.bd4152984ce1dp-16, 0x1.f5afd2b450df7p-18, -0x1.dafdaddc7f943p-20,
+ 0x1.1020f4741f79ep-22},
+ {0x1.c57f0542a7637p-3, -0x1.4e5535c17afc8p-4, 0x1.d312725242824p-6,
+ -0x1.3727cbc12a4bbp-7, 0x1.8d6730fc45b6bp-9, -0x1.e8855055c9b53p-11,
+ 0x1.21f73b70cc792p-12, -0x1.4d4fe06f13831p-14, 0x1.73867a82f7484p-16,
+ -0x1.8fab204d1d75ep-18, 0x1.91d9ba10367f4p-20, -0x1.5077ce4b334ddp-22,
+ 0x1.501716d098f14p-25},
+ {0x1.6e9827d229d2dp-3, -0x1.bd6ae4d14b135p-5, 0x1.043fe1a989f11p-6,
+ -0x1.259061b98cf96p-8, 0x1.409cc2b1c4fc2p-10, -0x1.53dec152f6abfp-12,
+ 0x1.5e72cb4cc919fp-14, -0x1.6018b68100642p-16, 0x1.58d859380fb24p-18,
+ -0x1.471723286dad5p-20, 0x1.21c1a0f7a6593p-22, -0x1.a872678d91154p-25,
+ 0x1.6eb74e2e99662p-28},
+ {0x1.29a8a4e95063ep-3, -0x1.29a8a316d3318p-5, 0x1.21876b3fe4f84p-7,
+ -0x1.1276f2d8ee36cp-9, 0x1.fbff52181a454p-12, -0x1.cb9ce9bde195ep-14,
+ 0x1.9710786fa90c5p-16, -0x1.6145ad5b471dcp-18, 0x1.2c52fac57009cp-20,
+ -0x1.f02a8711f07cfp-23, 0x1.7eb574960398cp-25, -0x1.e58ce325343aap-28,
+ 0x1.68510d1c32842p-31},
+ {0x1.e583024e2bc8p-4, -0x1.8fb458acb5b0fp-6, 0x1.42b9dffac2531p-8,
+ -0x1.ff9fe9a553dddp-11, 0x1.8e7e86883ba0bp-13, -0x1.313af0bb12375p-15,
+ 0x1.cc29ccb17372ep-18, -0x1.55895fbb1ae42p-20, 0x1.f2bd2d6c7fd07p-23,
+ -0x1.62ec031844613p-25, 0x1.d7d69ce7c1847p-28, -0x1.0106b95e4db03p-30,
+ 0x1.45aabbe505f6ap-34},
+ {0x1.8d9cbafa30408p-4, -0x1.0dd14614ed20fp-6, 0x1.6943976ea9dcap-9,
+ -0x1.dd6f05f4d7ce8p-12, 0x1.37891334aa621p-14, -0x1.91a8207766e1ep-17,
+ 0x1.ffcb0c613d75cp-20, -0x1.425116a6c88dfp-22, 0x1.90cb7c902d428p-25,
+ -0x1.e70fc740c3b6dp-28, 0x1.14a09ae5851ep-30, -0x1.00f9e03eae993p-33,
+ 0x1.14989aac741c2p-37},
+ {0x1.46dc6bf900f68p-4, -0x1.6e4b45246f8dp-7, 0x1.96a3de47cfdb5p-10,
+ -0x1.bf5070eb6823bp-13, 0x1.e7af6e4aa8ef8p-16, -0x1.078bf26142831p-18,
+ 0x1.1a6e547aa40bep-21, -0x1.2c1c68f62f614p-24, 0x1.3bb8b473dd9e7p-27,
+ -0x1.45576cacb45a1p-30, 0x1.39ab71899b44ep-33, -0x1.ee307d46e2866p-37,
+ 0x1.c21ba1b404f5ap-41},
+ {0x1.0d9a17e032288p-4, -0x1.f3e942ff4e097p-8, 0x1.cc77f09db5af8p-11,
+ -0x1.a56e8bffaab5cp-14, 0x1.7f49e36974e03p-17, -0x1.5a73fc0025d2fp-20,
+ 0x1.3742ae06a8be6p-23, -0x1.15ecf5317789bp-26, 0x1.ec74dd2b109fp-30,
+ -0x1.ac28325f88dc1p-33, 0x1.5ca9e8d7841b2p-36, -0x1.cfef04667185fp-40,
+ 0x1.6487c50052867p-44},
+ {0x1.be0c73cc19eddp-5, -0x1.56ce6f6c0cb33p-8, 0x1.0645980ec8568p-11,
+ -0x1.8f86f88695a8cp-15, 0x1.2ef80cb1dca7cp-18, -0x1.c97ff7c599a6dp-22,
+ 0x1.57f0ac907d436p-25, -0x1.016be8d812c69p-28, 0x1.7ef6d33c73b75p-32,
+ -0x1.17f9784eda0d4p-35, 0x1.7fd8662b486f1p-39, -0x1.ae21758156d89p-43,
+ 0x1.165732f1ae138p-47},
+ {0x1.71eafbd9f5877p-5, -0x1.d83714d904525p-9, 0x1.2c74dbaccea28p-12,
+ -0x1.7d27f3cdea565p-16, 0x1.e20b13581fcf8p-20, -0x1.2fe336f089679p-23,
+ 0x1.7dfce36129db3p-27, -0x1.dea026ee03f14p-31, 0x1.2a6019f7c64b1p-34,
+ -0x1.6e0eeb9f98eeap-38, 0x1.a58b4ed07d741p-42, -0x1.8d12c77071e4cp-46,
+ 0x1.b0241c6d5b761p-51},
+ {0x1.33714a024097ep-5, -0x1.467f441a50cbdp-9, 0x1.59fa2994d0e65p-13,
+ -0x1.6dd369d9306cap-17, 0x1.81fb2b2af9413p-21, -0x1.96604d3c1bb6ep-25,
+ 0x1.aaef2da14243p-29, -0x1.bf7f1b935d3ebp-33, 0x1.d3261ebcd2061p-37,
+ -0x1.e04c803bbd875p-41, 0x1.cff98a43bacdep-45, -0x1.6ef39a63cf675p-49,
+ 0x1.4f8abb4398a0dp-54},
+ {0x1.fff97acd75487p-6, -0x1.c502e8e46ec0cp-10, 0x1.903b0650672eap-14,
+ -0x1.6110aa5fb096fp-18, 0x1.36fd4c3e4040cp-22, -0x1.118489fe28728p-26,
+ 0x1.e06601208ac47p-31, -0x1.a52b90c21650ap-35, 0x1.6ffc42c05429bp-39,
+ -0x1.3ce3322a6972ep-43, 0x1.009d8ef37ff8cp-47, -0x1.5498d2cc51c99p-52,
+ 0x1.058cd4ea9bf04p-57},
+ {0x1.aaf347fc8c45bp-6, -0x1.3b2fd709cf97dp-10, 0x1.d0ddfb8593f4p-15,
+ -0x1.5673f4aa86542p-19, 0x1.f8048954325f6p-24, -0x1.72839959ab3e9p-28,
+ 0x1.101597113be2ap-32, -0x1.8f1cf0ff4adeep-37, 0x1.23dca407fd66p-41,
+ -0x1.a4f387e57a6a5p-46, 0x1.1dafd753f65e9p-50, -0x1.3e15343c973d6p-55,
+ 0x1.9a2af47d77e44p-61},
+ {0x1.64839d636f92bp-6, -0x1.b7adf7536232dp-11, 0x1.0eec0b6357148p-15,
+ -0x1.4da09b7f2c52bp-20, 0x1.9a8b146de838ep-25, -0x1.f8d1f145e7b6fp-30,
+ 0x1.3624435b3ba11p-34, -0x1.7cba19b4af977p-39, 0x1.d2282481ba91ep-44,
+ -0x1.198c1e91f9564p-48, 0x1.4046224f8ccp-53, -0x1.2b1dc676c096fp-58,
+ 0x1.43d3358c64dafp-64}
+}
+};
diff --git a/pl/math/erfcf.h b/pl/math/erfcf.h
new file mode 100644
index 0000000..8f1e5f4
--- /dev/null
+++ b/pl/math/erfcf.h
@@ -0,0 +1,38 @@
+/*
+ * Shared functions for scalar and vector single-precision erfc(x) functions.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_ERFCF_H
+#define PL_MATH_ERFCF_H
+
+#include "math_config.h"
+
+#define FMA fma
+#include "estrin_wrap.h"
+
+/* Accurate exponential from optimized-routines. */
+double
+__exp_dd (double x, double xtail);
+
+static inline double
+eval_poly (double z, const double *coeff)
+{
+ double z2 = z * z;
+ double z4 = z2 * z2;
+ double z8 = z4 * z4;
+#define C(i) coeff[i]
+ return ESTRIN_15 (z, z2, z4, z8, C);
+#undef C
+}
+
+static inline double
+eval_exp_mx2 (double x)
+{
+ return __exp_dd (-(x * x), 0.0);
+}
+
+#undef FMA
+#endif // PL_MATH_ERFCF_H
diff --git a/pl/math/erfcf_2u.c b/pl/math/erfcf_2u.c
new file mode 100644
index 0000000..5a3f9b0
--- /dev/null
+++ b/pl/math/erfcf_2u.c
@@ -0,0 +1,133 @@
+/*
+ * Single-precision erfc(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "erfcf.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define P(i) __erfcf_poly_data.poly[i]
+
+/* Approximation of erfcf for |x| > 4.0. */
+static inline float
+approx_erfcf_hi (float x, uint32_t sign, const double *coeff)
+{
+ if (sign)
+ {
+ return 2.0f;
+ }
+
+ /* Polynomial contribution. */
+ double z = (double) fabs (x);
+ float p = (float) eval_poly (z, coeff);
+ /* Gaussian contribution. */
+ float e_mx2 = (float) eval_exp_mx2 (z);
+
+ return p * e_mx2;
+}
+
+/* Approximation of erfcf for |x| < 4.0. */
+static inline float
+approx_erfcf_lo (float x, uint32_t sign, const double *coeff)
+{
+ /* Polynomial contribution. */
+ double z = (double) fabs (x);
+ float p = (float) eval_poly (z, coeff);
+ /* Gaussian contribution. */
+ float e_mx2 = (float) eval_exp_mx2 (z);
+
+ if (sign)
+ return fmaf (-p, e_mx2, 2.0f);
+ else
+ return p * e_mx2;
+}
+
+/* Top 12 bits of a float (sign and exponent bits). */
+static inline uint32_t
+abstop12 (float x)
+{
+ return (asuint (x) >> 20) & 0x7ff;
+}
+
+/* Top 12 bits of a float. */
+static inline uint32_t
+top12 (float x)
+{
+ return asuint (x) >> 20;
+}
+
+/* Fast erfcf approximation using polynomial approximation
+ multiplied by gaussian.
+ Most of the computation is carried out in double precision,
+ and is very sensitive to accuracy of polynomial and exp
+ evaluation.
+ Worst-case error is 1.968ulps, obtained for x = 2.0412941.
+ erfcf(0x1.05492p+1) got 0x1.fe10f6p-9 want 0x1.fe10f2p-9 ulp
+ err 1.46788. */
+float
+erfcf (float x)
+{
+ /* Get top words and sign. */
+ uint32_t ix = asuint (x); /* We need to compare at most 32 bits. */
+ uint32_t sign = ix >> 31;
+ uint32_t ia12 = top12 (x) & 0x7ff;
+
+ /* Handle special cases and small values with a single comparison:
+ abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
+
+ Special cases
+ erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2
+
+ Errno
+ EDOM does not have to be set in case of erfcf(nan).
+ Only ERANGE may be set in case of underflow.
+
+ Small values (|x|<small)
+ |x|<0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328). */
+ if (unlikely (abstop12 (x) - 0x328 >= (abstop12 (INFINITY) & 0x7f8) - 0x328))
+ {
+ if (abstop12 (x) >= 0x7f8)
+ return (float) (sign << 1) + 1.0f / x; /* Special cases. */
+ else
+ return 1.0f - x; /* Small case. */
+ }
+
+ /* Normalized numbers divided in 4 intervals
+ with bounds: 2.0, 4.0, 8.0 and 10.0. 10 was chosen as the upper bound for
+ the interesting region as it is the smallest value, representable as a
+ 12-bit integer, for which returning 0 gives <1.5 ULP. */
+ if (ia12 < 0x400)
+ {
+ return approx_erfcf_lo (x, sign, P (0));
+ }
+ if (ia12 < 0x408)
+ {
+ return approx_erfcf_lo (x, sign, P (1));
+ }
+ if (ia12 < 0x410)
+ {
+ return approx_erfcf_hi (x, sign, P (2));
+ }
+ if (ia12 < 0x412)
+ {
+ return approx_erfcf_hi (x, sign, P (3));
+ }
+ if (sign)
+ {
+ return 2.0f;
+ }
+ return __math_uflowf (0);
+}
+
+PL_SIG (S, F, 1, erfc, -4.0, 10.0)
+PL_TEST_ULP (erfcf, 1.5)
+PL_TEST_INTERVAL (erfcf, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (erfcf, 0x1p-127, 0x1p-26, 40000)
+PL_TEST_INTERVAL (erfcf, -0x1p-127, -0x1p-26, 40000)
+PL_TEST_INTERVAL (erfcf, 0x1p-26, 0x1p5, 40000)
+PL_TEST_INTERVAL (erfcf, -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (erfcf, 0, inf, 40000)
diff --git a/pl/math/erfcf_data.c b/pl/math/erfcf_data.c
new file mode 100644
index 0000000..2e018c8
--- /dev/null
+++ b/pl/math/erfcf_data.c
@@ -0,0 +1,57 @@
+/*
+ * Data used in single-precision erfc(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Polynomial coefficients for approximating erfc(x)*exp(x*x) in double
+ precision. Generated using the Remez algorithm on each interval separately
+ (see erfcf.sollya for more detail). */
+const struct erfcf_poly_data __erfcf_poly_data
+ = {.poly
+ = {{
+#if ERFCF_POLY_NCOEFFS == 16
+ 0x1.ffffffffe7c59p-1, -0x1.20dd74f8cecc5p0, 0x1.fffffc67a0fbdp-1,
+ -0x1.81270c3ced2d6p-1, 0x1.fffc0c6606e45p-2, -0x1.340a779e8a8e3p-2,
+ 0x1.54c1663fc5a01p-3, -0x1.5d468c9269dafp-4, 0x1.4afe6b00df9d5p-5,
+ -0x1.1d22d2720cb91p-6, 0x1.afa399a5761b1p-8, -0x1.113851b5858adp-9,
+ 0x1.0f992e4d5c6a4p-11, -0x1.86534d558052ap-14, 0x1.63e537bfb7cd5p-17,
+ -0x1.32712a6275c4dp-21
+#endif
+ },
+
+ {
+#if ERFCF_POLY_NCOEFFS == 16
+ 0x1.fea5663f75cd1p-1, -0x1.1cb5a82adf1c4p0, 0x1.e7c8da942d86fp-1,
+ -0x1.547ba0456bac7p-1, 0x1.8a6fc0f4421a4p-2, -0x1.7c14f9301ee58p-3,
+ 0x1.2f67c8351577p-4, -0x1.8e733f6d159d9p-6, 0x1.aa6a0ec249067p-8,
+ -0x1.6f4ec45b11f3fp-10, 0x1.f4c00c4b33ba8p-13, -0x1.0795faf7846d2p-15,
+ 0x1.9cef9031810ddp-19, -0x1.c4d60c3fecdb6p-23, 0x1.360547ec2229dp-27,
+ -0x1.8ec1581647f9fp-33
+#endif
+ },
+
+ {
+#if ERFCF_POLY_NCOEFFS == 16
+ 0x1.dae421147c591p-1, -0x1.c211957a0abfcp-1, 0x1.28a8d87aa1b12p-1,
+ -0x1.224d2a58cbef4p-2, 0x1.b3d45dcaef898p-4, -0x1.ff99d8b33e7a9p-6,
+ 0x1.dac66375b99f6p-8, -0x1.5e1786f0f91ap-10, 0x1.9a2588deaec4fp-13,
+ -0x1.7b886b183b235p-16, 0x1.1209e7da8ff82p-19, -0x1.2e5c870c6ed8p-23,
+ 0x1.ec6a89422928ep-28, -0x1.16e7d837b61bcp-32, 0x1.88868a73e4b43p-38,
+ -0x1.027034672f11cp-44
+#endif
+ },
+
+ {
+#if ERFCF_POLY_NCOEFFS == 16
+ 0x1.8ae320c1bad5ap-1, -0x1.1cdd6aa6929aap-1, 0x1.0e39a7b285f58p-2,
+ -0x1.6fb12a95e351dp-4, 0x1.77dd0649e352cp-6, -0x1.28a9e9560c461p-8,
+ 0x1.6f7d7778e9433p-11, -0x1.68363698afe4ap-14, 0x1.17e94cdf35d82p-17,
+ -0x1.5766a817bd3ffp-21, 0x1.48d892094a2c1p-25, -0x1.e1b6511ab6d0bp-30,
+ 0x1.04c7b8143f6a4p-34, -0x1.898831961065bp-40, 0x1.71ae8a56142a6p-46,
+ -0x1.45abac612344bp-53
+#endif
+ }}};
diff --git a/pl/math/erff_1u5.c b/pl/math/erff_1u5.c
new file mode 100644
index 0000000..1a69872
--- /dev/null
+++ b/pl/math/erff_1u5.c
@@ -0,0 +1,108 @@
+/*
+ * Single-precision erf(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "estrinf.h"
+#include "hornerf.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
+#define A __erff_data.erff_poly_A
+#define B __erff_data.erff_poly_B
+
+/* Top 12 bits of a float. */
+static inline uint32_t
+top12 (float x)
+{
+ return asuint (x) >> 20;
+}
+
+/* Efficient implementation of erff using either a pure polynomial approximation
+ or the exponential of a polynomial. Worst-case error is 1.09ulps at
+ 0x1.c111acp-1. */
+float
+erff (float x)
+{
+ float r, x2;
+
+ /* Get top word. */
+ uint32_t ix = asuint (x);
+ uint32_t sign = ix >> 31;
+ uint32_t ia12 = top12 (x) & 0x7ff;
+
+ /* Limit of both intervals is 0.875 for performance reasons but coefficients
+ computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
+ from 0.94 to 1.1ulps. */
+ if (ia12 < 0x3f6)
+ { /* a = |x| < 0.875. */
+
+ /* Tiny and subnormal cases. */
+ if (unlikely (ia12 < 0x318))
+ { /* |x| < 2^(-28). */
+ if (unlikely (ia12 < 0x040))
+ { /* |x| < 2^(-119). */
+ float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
+ return check_uflowf (y);
+ }
+ return x + TwoOverSqrtPiMinusOne * x;
+ }
+
+ x2 = x * x;
+
+ /* Normalized cases (|x| < 0.921875) - Use Horner scheme for x+x*P(x^2).
+ */
+#define C(i) A[i]
+ r = fmaf (HORNER_5 (x2, C), x, x);
+#undef C
+ }
+ else if (ia12 < 0x408)
+ { /* |x| < 4.0 - Use a custom Estrin scheme. */
+
+ float a = fabsf (x);
+ /* Use Estrin scheme on high order (small magnitude) coefficients. */
+#define C(i) B[i]
+ r = ESTRIN_3_ (a, x * x, C, 3);
+#undef C
+ /* Then switch to pure Horner scheme. */
+ r = fmaf (r, a, B[2]);
+ r = fmaf (r, a, B[1]);
+ r = fmaf (r, a, B[0]);
+ r = fmaf (r, a, a);
+ /* Single precision exponential with ~0.5ulps ensures erff has maximum
+ relative error below 1ulp on [0.921875, 4.0] and below 1.1ulps on
+ [0.875, 4.0]. */
+ r = expf (-r);
+ /* Explicit copysign (calling copysignf increases latency). */
+ if (sign)
+ r = -1.0f + r;
+ else
+ r = 1.0f - r;
+ }
+ else
+ { /* |x| >= 4.0. */
+
+ /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */
+ if (unlikely (ia12 >= 0x7f8))
+ return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
+
+ /* Explicit copysign (calling copysignf increases latency). */
+ if (sign)
+ r = -1.0f;
+ else
+ r = 1.0f;
+ }
+ return r;
+}
+
+PL_SIG (S, F, 1, erf, -4.0, 4.0)
+PL_TEST_ULP (erff, 0.6)
+PL_TEST_INTERVAL (erff, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (erff, 0x1p-127, 0x1p-26, 40000)
+PL_TEST_INTERVAL (erff, -0x1p-127, -0x1p-26, 40000)
+PL_TEST_INTERVAL (erff, 0x1p-26, 0x1p3, 40000)
+PL_TEST_INTERVAL (erff, -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (erff, 0, inf, 40000)
diff --git a/pl/math/erff_data.c b/pl/math/erff_data.c
new file mode 100644
index 0000000..2352bae
--- /dev/null
+++ b/pl/math/erff_data.c
@@ -0,0 +1,16 @@
+/*
+ * Data for approximation of erff.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Minimax approximation of erff. */
+const struct erff_data __erff_data
+ = {.erff_poly_A = {0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
+ -0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f},
+ .erff_poly_B
+ = {0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f, -0x1.8d6300p-6f,
+ 0x1.fd1336p-9f, -0x1.91d2ccp-12f, 0x1.222900p-16f}};
diff --git a/pl/math/estrin.h b/pl/math/estrin.h
new file mode 100644
index 0000000..f967fb0
--- /dev/null
+++ b/pl/math/estrin.h
@@ -0,0 +1,16 @@
+/*
+ * Helper macros for double-precision Estrin polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#if V_SUPPORTED
+#define FMA v_fma_f64
+#else
+#define FMA fma
+#endif
+
+#include "estrin_wrap.h"
diff --git a/pl/math/estrin_wrap.h b/pl/math/estrin_wrap.h
new file mode 100644
index 0000000..2ae0700
--- /dev/null
+++ b/pl/math/estrin_wrap.h
@@ -0,0 +1,48 @@
+/*
+ * Helper macros for double-precision Estrin polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+// clang-format off
+#define ESTRIN_1_(x, c, i) FMA(x, c(1 + i), c(i))
+#define ESTRIN_2_(x, x2, c, i) FMA(x2, c(2 + i), ESTRIN_1_(x, c, i))
+#define ESTRIN_3_(x, x2, c, i) FMA(x2, ESTRIN_1_(x, c, 2 + i), ESTRIN_1_(x, c, i))
+#define ESTRIN_4_(x, x2, x4, c, i) FMA(x4, c(4 + i), ESTRIN_3_(x, x2, c, i))
+#define ESTRIN_5_(x, x2, x4, c, i) FMA(x4, ESTRIN_1_(x, c, 4 + i), ESTRIN_3_(x, x2, c, i))
+#define ESTRIN_6_(x, x2, x4, c, i) FMA(x4, ESTRIN_2_(x, x2, c, 4 + i), ESTRIN_3_(x, x2, c, i))
+#define ESTRIN_7_(x, x2, x4, c, i) FMA(x4, ESTRIN_3_(x, x2, c, 4 + i), ESTRIN_3_(x, x2, c, i))
+#define ESTRIN_8_(x, x2, x4, x8, c, i) FMA(x8, c(8 + i), ESTRIN_7_(x, x2, x4, c, i))
+#define ESTRIN_9_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_1_(x, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
+#define ESTRIN_10_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_2_(x, x2, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
+#define ESTRIN_11_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_3_(x, x2, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
+#define ESTRIN_12_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_4_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
+#define ESTRIN_13_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_5_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
+#define ESTRIN_14_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_6_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
+#define ESTRIN_15_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_7_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
+#define ESTRIN_16_(x, x2, x4, x8, x16, c, i) FMA(x16, c(16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
+#define ESTRIN_17_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_1_(x, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
+#define ESTRIN_18_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_2_(x, x2, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
+#define ESTRIN_19_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_3_(x, x2, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
+
+#define ESTRIN_1(x, c) ESTRIN_1_(x, c, 0)
+#define ESTRIN_2(x, x2, c) ESTRIN_2_(x, x2, c, 0)
+#define ESTRIN_3(x, x2, c) ESTRIN_3_(x, x2, c, 0)
+#define ESTRIN_4(x, x2, x4, c) ESTRIN_4_(x, x2, x4, c, 0)
+#define ESTRIN_5(x, x2, x4, c) ESTRIN_5_(x, x2, x4, c, 0)
+#define ESTRIN_6(x, x2, x4, c) ESTRIN_6_(x, x2, x4, c, 0)
+#define ESTRIN_7(x, x2, x4, c) ESTRIN_7_(x, x2, x4, c, 0)
+#define ESTRIN_8(x, x2, x4, x8, c) ESTRIN_8_(x, x2, x4, x8, c, 0)
+#define ESTRIN_9(x, x2, x4, x8, c) ESTRIN_9_(x, x2, x4, x8, c, 0)
+#define ESTRIN_10(x, x2, x4, x8, c) ESTRIN_10_(x, x2, x4, x8, c, 0)
+#define ESTRIN_11(x, x2, x4, x8, c) ESTRIN_11_(x, x2, x4, x8, c, 0)
+#define ESTRIN_12(x, x2, x4, x8, c) ESTRIN_12_(x, x2, x4, x8, c, 0)
+#define ESTRIN_13(x, x2, x4, x8, c) ESTRIN_13_(x, x2, x4, x8, c, 0)
+#define ESTRIN_14(x, x2, x4, x8, c) ESTRIN_14_(x, x2, x4, x8, c, 0)
+#define ESTRIN_15(x, x2, x4, x8, c) ESTRIN_15_(x, x2, x4, x8, c, 0)
+#define ESTRIN_16(x, x2, x4, x8, x16, c) ESTRIN_16_(x, x2, x4, x8, x16, c, 0)
+#define ESTRIN_17(x, x2, x4, x8, x16, c) ESTRIN_17_(x, x2, x4, x8, x16, c, 0)
+#define ESTRIN_18(x, x2, x4, x8, x16, c) ESTRIN_18_(x, x2, x4, x8, x16, c, 0)
+#define ESTRIN_19(x, x2, x4, x8, x16, c) ESTRIN_19_(x, x2, x4, x8, x16, c, 0)
+// clang-format on
diff --git a/pl/math/estrinf.h b/pl/math/estrinf.h
new file mode 100644
index 0000000..175233c
--- /dev/null
+++ b/pl/math/estrinf.h
@@ -0,0 +1,14 @@
+/*
+ * Helper macros for single-precision Estrin polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#if V_SUPPORTED
+#define FMA v_fma_f32
+#else
+#define FMA fmaf
+#endif
+
+#include "estrin_wrap.h"
diff --git a/pl/math/exp.c b/pl/math/exp.c
new file mode 100644
index 0000000..90253b6
--- /dev/null
+++ b/pl/math/exp.c
@@ -0,0 +1,163 @@
+/*
+ * Double-precision e^x function.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
+#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
+
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+{
+ double_t scale, y;
+
+ if ((ki & 0x80000000) == 0)
+ {
+ /* k > 0, the exponent of scale might have overflowed by <= 460. */
+ sbits -= 1009ull << 52;
+ scale = asdouble (sbits);
+ y = 0x1p1009 * (scale + scale * tmp);
+ return check_oflow (eval_as_double (y));
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ scale = asdouble (sbits);
+ y = scale + scale * tmp;
+ if (y < 1.0)
+ {
+ /* Round y to the right precision before scaling it into the subnormal
+ range to avoid double rounding that can cause 0.5+E/2 ulp error where
+ E is the worst-case ulp error outside the subnormal range. So this
+ is only useful if the goal is better than 1 ulp worst-case error. */
+ double_t hi, lo;
+ lo = scale - y + scale * tmp;
+ hi = 1.0 + y;
+ lo = 1.0 - hi + y + lo;
+ y = eval_as_double (hi + lo) - 1.0;
+ /* Avoid -0.0 with downward rounding. */
+ if (WANT_ROUNDING && y == 0.0)
+ y = 0.0;
+ /* The underflow exception needs to be signaled explicitly. */
+ force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+ }
+ y = 0x1p-1022 * y;
+ return check_uflow (eval_as_double (y));
+}
+
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t
+top12 (double x)
+{
+ return asuint64 (x) >> 52;
+}
+
+/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+ If hastail is 0 then xtail is assumed to be 0 too. */
+static inline double
+exp_inline (double x, double xtail, int hastail)
+{
+ uint32_t abstop;
+ uint64_t ki, idx, top, sbits;
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t kd, z, r, r2, scale, tail, tmp;
+
+ abstop = top12 (x) & 0x7ff;
+ if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
+ {
+ if (abstop - top12 (0x1p-54) >= 0x80000000)
+ /* Avoid spurious underflow for tiny x. */
+ /* Note: 0 is common input. */
+ return WANT_ROUNDING ? 1.0 + x : 1.0;
+ if (abstop >= top12 (1024.0))
+ {
+ if (asuint64 (x) == asuint64 (-INFINITY))
+ return 0.0;
+ if (abstop >= top12 (INFINITY))
+ return 1.0 + x;
+ if (asuint64 (x) >> 63)
+ return __math_uflow (0);
+ else
+ return __math_oflow (0);
+ }
+ /* Large x is special cased below. */
+ abstop = 0;
+ }
+
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
+ z = InvLn2N * x;
+#if TOINT_INTRINSICS
+ kd = roundtoint (z);
+ ki = converttoint (z);
+#elif EXP_USE_TOINT_NARROW
+ /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
+ kd = eval_as_double (z + Shift);
+ ki = asuint64 (kd) >> 16;
+ kd = (double_t) (int32_t) ki;
+#else
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ kd = eval_as_double (z + Shift);
+ ki = asuint64 (kd);
+ kd -= Shift;
+#endif
+ r = x + kd * NegLn2hiN + kd * NegLn2loN;
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ if (hastail)
+ r += xtail;
+ /* 2^(k/N) ~= scale * (1 + tail). */
+ idx = 2 * (ki % N);
+ top = ki << (52 - EXP_TABLE_BITS);
+ tail = asdouble (T[idx]);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ sbits = T[idx + 1] + top;
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ r2 = r * r;
+ /* Without fma the worst case error is 0.25/N ulp larger. */
+ /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
+#if EXP_POLY_ORDER == 4
+ tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
+#elif EXP_POLY_ORDER == 5
+ tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+#elif EXP_POLY_ORDER == 6
+ tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
+#endif
+ if (unlikely (abstop == 0))
+ return specialcase (tmp, sbits, ki);
+ scale = asdouble (sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ return eval_as_double (scale + scale * tmp);
+}
+
+/* May be useful for implementing pow where more than double
+ precision input is needed. */
+double
+__exp_dd (double x, double xtail)
+{
+ return exp_inline (x, xtail, 1);
+}
+
diff --git a/pl/math/exp_data.c b/pl/math/exp_data.c
new file mode 100644
index 0000000..2354be7
--- /dev/null
+++ b/pl/math/exp_data.c
@@ -0,0 +1,1120 @@
+/*
+ * Shared data between exp, exp2 and pow.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+
+const struct exp_data __exp_data = {
+// N/ln2
+.invln2N = 0x1.71547652b82fep0 * N,
+// -ln2/N
+#if N == 64
+.negln2hiN = -0x1.62e42fefa0000p-7,
+.negln2loN = -0x1.cf79abc9e3b3ap-46,
+#elif N == 128
+.negln2hiN = -0x1.62e42fefa0000p-8,
+.negln2loN = -0x1.cf79abc9e3b3ap-47,
+#elif N == 256
+.negln2hiN = -0x1.62e42fefc0000p-9,
+.negln2loN = 0x1.c610ca86c3899p-45,
+#elif N == 512
+.negln2hiN = -0x1.62e42fef80000p-10,
+.negln2loN = -0x1.1cf79abc9e3b4p-45,
+#endif
+// Used for rounding when !TOINT_INTRINSICS
+#if EXP_USE_TOINT_NARROW
+.shift = 0x1800000000.8p0,
+#else
+.shift = 0x1.8p52,
+#endif
+// exp polynomial coefficients.
+.poly = {
+#if N == 64 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
+// abs error: 1.5543*2^-60
+// ulp error: 0.529 (0.533 without fma)
+// if |x| < ln2/128+eps
+// abs error if |x| < ln2/64: 1.7157*2^-50
+0x1.fffffffffdbcdp-2,
+0x1.555555555444cp-3,
+0x1.555573c6a9f7dp-5,
+0x1.1111266d28935p-7,
+#elif N == 64 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
+// abs error: 1.6735*2^-64
+// ulp error: 0.518 (0.522 without fma)
+// if |x| < ln2/64
+0x1.5555555548f9ap-3,
+0x1.555555554bf5dp-5,
+0x1.11115b75f0f4dp-7,
+0x1.6c171a6b6303ep-10,
+#elif N == 128 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
+// abs error: 1.555*2^-66
+// ulp error: 0.509 (0.511 without fma)
+// if |x| < ln2/256+eps
+// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
+// abs error if |x| < ln2/128: 1.7145*2^-56
+0x1.ffffffffffdbdp-2,
+0x1.555555555543cp-3,
+0x1.55555cf172b91p-5,
+0x1.1111167a4d017p-7,
+#elif N == 128 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
+// abs error: 1.5542*2^-60
+// ulp error: 0.521 (0.523 without fma)
+// if |x| < ln2/128
+0x1.fffffffffdbcep-2,
+0x1.55555555543c2p-3,
+0x1.555573c64f2e3p-5,
+0x1.111126b4eff73p-7,
+#elif N == 128 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
+// abs error: 1.6861*2^-71
+// ulp error: 0.509 (0.511 without fma)
+// if |x| < ln2/128
+0x1.55555555548fdp-3,
+0x1.555555555658fp-5,
+0x1.111123a859bb6p-7,
+0x1.6c16ba6920cabp-10,
+#elif N == 256 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
+// abs error: 1.43*2^-58
+// ulp error: 0.549 (0.550 without fma)
+// if |x| < ln2/512
+0x1p0, // unused
+0x1.fffffffffffd4p-2,
+0x1.5555571d6ef9p-3,
+0x1.5555576a5adcep-5,
+#elif N == 256 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
+// abs error: 1.5547*2^-66
+// ulp error: 0.505 (0.506 without fma)
+// if |x| < ln2/256
+0x1.ffffffffffdbdp-2,
+0x1.555555555543cp-3,
+0x1.55555cf16e1edp-5,
+0x1.1111167a4b553p-7,
+#elif N == 512 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
+// abs error: 1.4300*2^-63
+// ulp error: 0.504
+// if |x| < ln2/1024
+// abs error if |x| < ln2/512: 1.0689*2^-55
+0x1p0, // unused
+0x1.ffffffffffffdp-2,
+0x1.555555c75bb6p-3,
+0x1.555555dec04a8p-5,
+#endif
+},
+.exp2_shift = 0x1.8p52 / N,
+// exp2 polynomial coefficients.
+.exp2_poly = {
+#if N == 64 && EXP2_POLY_ORDER == 6 && EXP2_POLY_WIDE
+// abs error: 1.3054*2^-63
+// ulp error: 0.515
+// if |x| < 1/64
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c58fp-3,
+0x1.c6b08d7045cf1p-5,
+0x1.3b2ab6fb8fd0ep-7,
+0x1.5d884afec48d7p-10,
+0x1.43097dc684ae1p-13,
+#elif N == 128 && EXP2_POLY_ORDER == 5 && !EXP2_POLY_WIDE
+// abs error: 1.2195*2^-65
+// ulp error: 0.507 (0.511 without fma)
+// if |x| < 1/256
+// abs error if |x| < 1/128: 1.9941*2^-56
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c424p-3,
+0x1.c6b08d70cf4b5p-5,
+0x1.3b2abd24650ccp-7,
+0x1.5d7e09b4e3a84p-10,
+#elif N == 256 && EXP2_POLY_ORDER == 5 && EXP2_POLY_WIDE
+// abs error: 1.2195*2^-65
+// ulp error: 0.504 (0.508 without fma)
+// if |x| < 1/256
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c424p-3,
+0x1.c6b08d70cf4b5p-5,
+0x1.3b2abd24650ccp-7,
+0x1.5d7e09b4e3a84p-10,
+#elif N == 512 && EXP2_POLY_ORDER == 4 && !EXP2_POLY_WIDE
+// abs error: 1.4411*2^-64
+// ulp error: 0.5024 (0.5063 without fma)
+// if |x| < 1/1024
+// abs error if |x| < 1/512: 1.9430*2^-56
+0x1.62e42fefa39ecp-1,
+0x1.ebfbdff82c58bp-3,
+0x1.c6b08e46de41fp-5,
+0x1.3b2ab786ee1dap-7,
+#endif
+},
+// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
+// tab[2*k] = asuint64(T[k])
+// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
+.tab = {
+#if N == 64
+0x0, 0x3ff0000000000000,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0xbc93cedd78565858, 0x3feea23882552225,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+#elif N == 128
+0x0, 0x3ff0000000000000,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc93cedd78565858, 0x3feea23882552225,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+#elif N == 256
+0x0, 0x3ff0000000000000,
+0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0xbc82985dd8521d32, 0x3feff168143b0281,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c845fad437fa426, 0x3fefde5f72f654b1,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0xbc954529642b232f, 0x3fefd50a0e3c1f89,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c9407fb30d06420, 0x3fefb0f145e46c85,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc65704e90c9f860, 0x3fef86a814f204ab,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc897cea57e46280, 0x3fef7e95934f312e,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c8cdc1873af2155, 0x3fef582f95281c6b,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc9493684653a131, 0x3fef50e75eb44027,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0xbc98e2899077520a, 0x3fef49c18438ce4d,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c9120fcd4f59273, 0x3fef42be3578a819,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c877afbca90ef84, 0x3fef351ffb82140a,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c91512f082876ee, 0x3fef2e85711ece75,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0xbc803297e78260bf, 0x3fef21ba7591bb70,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0xbc91e75c40b4251e, 0x3fef157e39771b2f,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c98a911f1f7785a, 0x3fef0f961f641589,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0xbc979517a03e2847, 0x3feefeb83ba8ea32,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0xbc87430803972b34, 0x3feef431a2de883b,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc954de30ae02d94, 0x3feeef26231e754a,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c79c3bba5562a2f, 0x3feee0e544ede173,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc85a71612e21658, 0x3feedc70df1c5175,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0x3c86421f6f1d24d6, 0x3feed822c367a024,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc9348a6815fce65, 0x3feed3fb2709468a,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0x3c835c43984d9871, 0x3feecffa3f84b9d4,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0xbc632afc8d9473a0, 0x3feecc2042a7d232,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0xbc95fc5e44de020e, 0x3feec86d668b3237,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c892ca3bf144e63, 0x3feebe41b817c114,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0x3c73e34f67e67118, 0x3feeb8417f4531ee,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0xbc943a3540d1898a, 0x3feeaa11fba87a03,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0xbc951f58ddaa8090, 0x3feea84590998b93,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc82e1648e50a17c, 0x3feea6a320dceb71,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
+0xbc93cedd78565858, 0x3feea23882552225,
+0xbc85c33fdf910406, 0x3feea1c1c70833f6,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0x3c81079ab5789604, 0x3feea0f4b19e9538,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0x3c727df161cd7778, 0x3feea052fa75173e,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0x3c93f9924a05b767, 0x3fee9fdcddd47645,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc88e67a9006c909, 0x3fee9f8286ead08a,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc760a3629969871, 0x3feea3878491c491,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c7c88549b958471, 0x3feea9cad931a436,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c931143962f7877, 0x3feeabd0a478580f,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c93e9e96f112479, 0x3feeae05bad61778,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0xbc51669428996971, 0x3feebbdd9a7670b3,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0xbc79740b58a20091, 0x3feeca5e8d07f29e,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0xbc903d5cbe27874b, 0x3feed2c980460ad8,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c5986178980fce0, 0x3feed74a8af46052,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc7274aedac8ff80, 0x3feef68415b749b1,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc302899507554e5, 0x3fef0f69c3f3a207,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0xbc80dda2d4c0010c, 0x3fef16286141b33d,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0x3c836909391181d3, 0x3fef244778fafb22,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0xbc7ac28b7bef6621, 0x3fef33405751c4db,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0xbc8cc734592af7fc, 0x3fef43155b5bab74,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c87752a44f587e8, 0x3fef4b532b08c968,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0x3c8c254d16117a68, 0x3fef655d71ff6075,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc776caa4c2ff1cf, 0x3fef953924676d76,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c901f3a75ee0efe, 0x3fefd632798844f8,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0xbc699c7db2effc76, 0x3fefedba3692d514,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+0x3c64b458677f9840, 0x3feff9d96b2a23d9,
+#elif N == 512
+0x0, 0x3ff0000000000000,
+0xbc75d87ade1f60d5, 0x3feffd8c86da1c0a,
+0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
+0x3c9bffdaa7ac4bac, 0x3feff8ab5b2cbd11,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0x3c75c18e5ae0563a, 0x3feff3d1e77170b4,
+0xbc82985dd8521d32, 0x3feff168143b0281,
+0xbc705b1125cf49a5, 0x3fefef003103b10e,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c9f879abbff3f87, 0x3fefea363d42b027,
+0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
+0x3c9b14003824712a, 0x3fefe57411915a8a,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c61cbf0f38af658, 0x3fefe0b9b35659d8,
+0x3c845fad437fa426, 0x3fefde5f72f654b1,
+0xbc9a3316383dcbc5, 0x3fefdc0727fc1762,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0x3c9901c9e0e797fd, 0x3fefd75c74f0bec2,
+0xbc954529642b232f, 0x3fefd50a0e3c1f89,
+0xbc89b3236d111646, 0x3fefd2b99fa6407c,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0xbc8cb191be99b1b0, 0x3fefce1ead925493,
+0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
+0xbc9acb71e83765b7, 0x3fefc98ba42e7d30,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c5cd3e58b03697e, 0x3fefc50088f8093f,
+0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
+0xbc8bfb07d4755452, 0x3fefc07d61701716,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c8aedeb3e7b14cd, 0x3fefbc02331b9715,
+0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
+0x3c9a8eb1f3d914b4, 0x3fefb78f03834e52,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0xbc85b9eb0402507b, 0x3fefb323d833d93f,
+0x3c9407fb30d06420, 0x3fefb0f145e46c85,
+0xbc93f0f225bbf3ee, 0x3fefaec0b6bdae53,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0xbc9c3fe7282d1784, 0x3fefaa65a4b520ba,
+0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
+0x3c9c8be44bf4cde8, 0x3fefa612a7b26300,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0x3c820c5444c93c44, 0x3fefa1c7c55189c6,
+0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
+0xbc84c6baeb580d7a, 0x3fef9d8503328e6d,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0x3c8657aa1b0d9f83, 0x3fef994a66f951ce,
+0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
+0x3c62f2c7fd6ee145, 0x3fef9517f64d9ef1,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc6b0b2789925e90, 0x3fef90edb6db2dc1,
+0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
+0xbc93aad17d197fae, 0x3fef8ccbae51a5c8,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc989c464a07ad70, 0x3fef88b1e264a0e9,
+0xbc65704e90c9f860, 0x3fef86a814f204ab,
+0xbc72c338fce197f4, 0x3fef84a058cbae1e,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc6dca724cea0eb6, 0x3fef809717425438,
+0xbc897cea57e46280, 0x3fef7e95934f312e,
+0x3c464770b955d34d, 0x3fef7c962388149e,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0xbc962811c114424f, 0x3fef789d83606e12,
+0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
+0x3c8ec58e74904dd4, 0x3fef74ad3c92df73,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c8d63b0ab2d5bbf, 0x3fef70c554eaea89,
+0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
+0xbc9ca9effbeeac92, 0x3fef6ce5d23816c9,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c8bda920de0f6e2, 0x3fef690eba4df41f,
+0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
+0xbc9a597f9a5ff71c, 0x3fef654013041dc2,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c50835b125aa573, 0x3fef6179e2363cf8,
+0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
+0x3c8aaa13d61aec1f, 0x3fef5dbc2dc40bf0,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c8a4f81aa7110bd, 0x3fef5a06fb91588f,
+0x3c8cdc1873af2155, 0x3fef582f95281c6b,
+0xbc6817fd6a313e3e, 0x3fef565a51860746,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc96236af85fd26a, 0x3fef52b6358e15e8,
+0xbc9493684653a131, 0x3fef50e75eb44027,
+0x3c7795eb4523abe7, 0x3fef4f1aad999e82,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0x3c8fe58b91b40095, 0x3fef4b87bf9cda38,
+0xbc98e2899077520a, 0x3fef49c18438ce4d,
+0x3c91ecaa860c614a, 0x3fef47fd7190241e,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0xbc3e45c83ba0bbcb, 0x3fef447bc96ffc18,
+0x3c9120fcd4f59273, 0x3fef42be3578a819,
+0xbc29fd3bea07b4ee, 0x3fef4102cd3d09b9,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c87f1c7350e256d, 0x3fef3d9282fc1f27,
+0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
+0x3c420dac6c124f4f, 0x3fef3a2af0b63bff,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0xbc99501d09bc09fd, 0x3fef36cc1c78903a,
+0x3c877afbca90ef84, 0x3fef351ffb82140a,
+0x3c73baf864dc8675, 0x3fef33760c547f15,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c91b0575c1eaf54, 0x3fef3028c65fa1ff,
+0x3c91512f082876ee, 0x3fef2e85711ece75,
+0xbc90364bc9ce33ab, 0x3fef2ce450b3cb82,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0xbc7548165d85ed32, 0x3fef29a8b16f0a30,
+0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
+0x3c7c3b977a68e32c, 0x3fef2675eeb3ab98,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0xbc93a255f697ecfe, 0x3fef234c0ea83f36,
+0xbc803297e78260bf, 0x3fef21ba7591bb70,
+0x3c8d2d19edc1e550, 0x3fef202b17779965,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc76b2173113dd8c, 0x3fef1d130f50d65c,
+0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
+0x3c811aa5f853590b, 0x3fef1a03fc675d1f,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0x3c61d61a34c8aa02, 0x3fef16fde4f2e280,
+0xbc91e75c40b4251e, 0x3fef157e39771b2f,
+0xbc91f892bf6b286d, 0x3fef1400cf2f6c18,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c7590c65c20e680, 0x3fef110cc15d5346,
+0x3c98a911f1f7785a, 0x3fef0f961f641589,
+0x3c86fe320b5c1e9d, 0x3fef0e21c1c14833,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0xbc903cd8b2f25790, 0x3fef0b3fd6a454d2,
+0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
+0x3c7b3bf786a54a87, 0x3fef08670653dfe4,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c74bb6c41732885, 0x3fef05975721b004,
+0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
+0xbc99d7399abb9a8b, 0x3fef02d0cf63eeac,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0xbc5451d60c6ac9eb, 0x3fef001375752b40,
+0xbc979517a03e2847, 0x3feefeb83ba8ea32,
+0x3c8787a210ceafd9, 0x3feefd5f4fb45e20,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc888d1e4629943d, 0x3feefab46484ebb4,
+0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
+0xbc93369c544088b6, 0x3feef812ba4ea77d,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0x3c85373ce4eb6dfb, 0x3feef57a577dd72b,
+0xbc87430803972b34, 0x3feef431a2de883b,
+0x3c83adec8265a67f, 0x3feef2eb428335b4,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc835388bcac6bc5, 0x3feef06581d3f669,
+0xbc954de30ae02d94, 0x3feeef26231e754a,
+0x3c727cdb4e4b6640, 0x3feeede91be9c811,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0x3c86c2696a26af35, 0x3feeeb761742d808,
+0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
+0x3c888f6ff06b979a, 0x3feee90c7a61d55b,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0xbc89d5efaabc2030, 0x3feee6ac4bcdf3ea,
+0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
+0xbc76b8867f91c9d6, 0x3feee4559212ef89,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c94c9c0b5157fe6, 0x3feee20853c10f28,
+0x3c79c3bba5562a2f, 0x3feee0e544ede173,
+0xbc62455345b51c8e, 0x3feedfc4976d27fa,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc93331de45477d0, 0x3feedd8a63b0a09b,
+0xbc85a71612e21658, 0x3feedc70df1c5175,
+0xbc95f84d39b39b16, 0x3feedb59bf29743f,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0xbc72ba4dc7c4d562, 0x3feed932b07a35df,
+0x3c86421f6f1d24d6, 0x3feed822c367a024,
+0xbc844f25dc02691f, 0x3feed7153e4a136a,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc888d328eb9b501, 0x3feed5016f44d8f5,
+0xbc9348a6815fce65, 0x3feed3fb2709468a,
+0x3c7f0bec42ddb15a, 0x3feed2f74a1af3f1,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0xbc615f0a2b9cd452, 0x3feed0f6d5817663,
+0x3c835c43984d9871, 0x3feecffa3f84b9d4,
+0xbc8c2e465a919e1d, 0x3feecf0018321a1a,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0xbc865dfd02bd08f1, 0x3feecd1318eb43ec,
+0xbc632afc8d9473a0, 0x3feecc2042a7d232,
+0xbc8e68cec89b1762, 0x3feecb2fde7006f4,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0xbc48ae858eb682ca, 0x3feec9566f8827d0,
+0xbc95fc5e44de020e, 0x3feec86d668b3237,
+0x3c5dd71277c0915f, 0x3feec786d3001fe5,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c92001325ecd7fb, 0x3feec5c10fa920a1,
+0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
+0x3c65ace6e2870332, 0x3feec4052c5916c4,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0xbc9595c55690ffaf, 0x3feec2532feaada6,
+0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
+0xbc8b401ba9fb5199, 0x3feec0ab213d5283,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c6df82bf324cc57, 0x3feebf0d073537ca,
+0x3c892ca3bf144e63, 0x3feebe41b817c114,
+0x3c97cae38641c7bb, 0x3feebd78e8bb586b,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0x3c62d80c5c4a2b67, 0x3feebbeeccbd7b2a,
+0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
+0x3c8f39c10d12eaf0, 0x3feeba6eba2e35f0,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc80b582d74a55d9, 0x3feeb8f8b804f127,
+0x3c73e34f67e67118, 0x3feeb8417f4531ee,
+0xbc6b4e327ff434ca, 0x3feeb78ccd3deb0d,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc592dca38593e20, 0x3feeb62b00da3b14,
+0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
+0xbc85daca9994833e, 0x3feeb4d359dfd53d,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0xbc980b4321bc6dae, 0x3feeb385df598d78,
+0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
+0xbc8390afec5241c5, 0x3feeb24298571b06,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0x3c8f15cdafe7d586, 0x3feeb1098bed1bdf,
+0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
+0xbc910aa91ae9b67f, 0x3feeafdac1351819,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c957e1b67462375, 0x3feeaeb63f4d854c,
+0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
+0x3c8124d5051552a7, 0x3feead9c0d59ca07,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0xbc3ca103952ecf1f, 0x3feeac8c32824135,
+0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
+0x3c773345c02a4fd6, 0x3feeab86b5f43d92,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0xbc909d2a0fce20f2, 0x3feeaa8b9ee20d1e,
+0xbc943a3540d1898a, 0x3feeaa11fba87a03,
+0xbc924f2cb4f81746, 0x3feea99af482fc8f,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0xbc943592a0a9846b, 0x3feea8b4be135acc,
+0xbc951f58ddaa8090, 0x3feea84590998b93,
+0xbc956bc85d444f4f, 0x3feea7d902d47c65,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0x3c914d1e4218319f, 0x3feea707ca0cbf0f,
+0xbc82e1648e50a17c, 0x3feea6a320dceb71,
+0x3c971c93709313f4, 0x3feea6411b078d26,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0x3c7f88303b60d222, 0x3feea584fd15612a,
+0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
+0x3c70125ca18d4b5b, 0x3feea4d3778bc944,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0x3c9592ea73798b11, 0x3feea42c91c56acd,
+0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
+0xbc9371d6d7d75739, 0x3feea390532205d8,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc8ac05fd996f807, 0x3feea2fec30678b7,
+0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
+0xbc91f5067d03653a, 0x3feea277e8dcc390,
+0xbc93cedd78565858, 0x3feea23882552225,
+0x3c917339c86ce3ad, 0x3feea1fbcc140be7,
+0xbc85c33fdf910406, 0x3feea1c1c70833f6,
+0xbc77e66065ba2500, 0x3feea18a7420a036,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0x3c964c827ee6b49a, 0x3feea123e87bfb7a,
+0x3c81079ab5789604, 0x3feea0f4b19e9538,
+0xbc928311a3c73480, 0x3feea0c830a4c8d4,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0x3c882c79e185e981, 0x3feea077541ee718,
+0x3c727df161cd7778, 0x3feea052fa75173e,
+0xbc8b48cea80b043b, 0x3feea0315a736c75,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0xbc4f4863bc8e5180, 0x3fee9ff64b30aa09,
+0x3c93f9924a05b767, 0x3fee9fdcddd47645,
+0x3c954835dd4b7548, 0x3fee9fc62dea2f8a,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc8bf41f59b59f8a, 0x3fee9fa10a38cee8,
+0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
+0xbc8f652fde52775c, 0x3fee9f86e7ba9fef,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0xbc7b0300defbcf98, 0x3fee9f77ce1303f6,
+0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
+0xbc89dab646035dc0, 0x3fee9f73c4eaa988,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc91f0c230588dde, 0x3fee9f7ad3ef9011,
+0xbc88e67a9006c909, 0x3fee9f8286ead08a,
+0x3c9106450507a28c, 0x3fee9f8d02d50b8f,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0xbc9129729a10f3a0, 0x3fee9faa5953c849,
+0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
+0x3c781a70a5124f67, 0x3fee9fd2df29ce7c,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c941626ea62646d, 0x3feea0069c1a861d,
+0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
+0xbc940b9f54365b7c, 0x3feea04597eeba8f,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0x3c873455e0e826c1, 0x3feea08fda749e5d,
+0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
+0x3c94f006ad874e3e, 0x3feea0e56b7fcf03,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0xbc8f6d693d0973bb, 0x3feea14652e958aa,
+0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
+0x3c58c5ee2b7e7848, 0x3feea1b2988fb9ec,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0xbc88b25e045d207b, 0x3feea22a4456e7a3,
+0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
+0xbc69cb3314060ca7, 0x3feea2ad5e2850ac,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0x3c87a0b15d19e0bb, 0x3feea33bedf2e1b9,
+0xbc760a3629969871, 0x3feea3878491c491,
+0x3c94aa7212bfa73c, 0x3feea3d5fbab091f,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0xbc81e688272a8a12, 0x3feea47b8f4abaa9,
+0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
+0x3c4ab7b7112ec9d5, 0x3feea52cb0d1736a,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0x3c8a1e274eed4476, 0x3feea5e968443d9a,
+0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
+0x3c94a533a59324da, 0x3feea6b1bdadb46d,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0x3c7a56d2760d087d, 0x3feea785b91e07f1,
+0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
+0x3c91682c1c6e8b05, 0x3feea86562ab00ec,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c89ea99cf7a9591, 0x3feea950c27004c2,
+0x3c7c88549b958471, 0x3feea9cad931a436,
+0xbc59e57d8f92ff8e, 0x3feeaa47e08e1957,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c909b176e05a9cd, 0x3feeab4ac52be8f7,
+0x3c931143962f7877, 0x3feeabd0a478580f,
+0x3c711607f1952c95, 0x3feeac597875c644,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c869608f0f86431, 0x3feead74029db01e,
+0x3c93e9e96f112479, 0x3feeae05bad61778,
+0xbc7f1ced15c5c5c0, 0x3feeae9a6bdb5598,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0x3c614b97be3f7b4e, 0x3feeafccbc6c19e6,
+0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
+0x3c81c1701c359530, 0x3feeb10afc931857,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc8edb1bf6809287, 0x3feeb2553499284b,
+0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
+0xbc8ba58ce7a736d3, 0x3feeb3ab6ccce12c,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc93fc025e1db9ce, 0x3feeb50dad829e70,
+0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
+0xbc8d737c7d71382e, 0x3feeb67bff148396,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0x3c6ae88c43905293, 0x3feeb7f669e2802b,
+0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
+0xbc93d1f7661fe51b, 0x3feeb97cf65253d1,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0x3c651b68797ffc1c, 0x3feebb0faccf9243,
+0xbc51669428996971, 0x3feebbdd9a7670b3,
+0x3c54579c5ceed70b, 0x3feebcae95cba768,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c87298413381667, 0x3feebe59b9bddb5b,
+0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
+0xbc905000be64e965, 0x3feec01121235681,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0xbc89fb12e3454b73, 0x3feec1d4d47f2598,
+0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
+0x3c7be2a03697693b, 0x3feec3a4dc5a3dd3,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0x3c90622b15810eea, 0x3feec581414380f2,
+0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
+0x3be9a5ecc875d327, 0x3feec76a0bcfc15e,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0x3c88ea486a3350ef, 0x3feec95f4499c647,
+0xbc79740b58a20091, 0x3feeca5e8d07f29e,
+0xbc7a2ee551d4c40f, 0x3feecb60f4424fcb,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c89c31f7e38028b, 0x3feecd6f23701b15,
+0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
+0xbc5fac13f4e005a3, 0x3feecf89dacfe68c,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0x3c7d8aced7162e89, 0x3feed1b1231475f7,
+0xbc903d5cbe27874b, 0x3feed2c980460ad8,
+0xbc848f50cea7269f, 0x3feed3e504f696b1,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c821eb9a08a0542, 0x3feed625893523d4,
+0x3c5986178980fce0, 0x3feed74a8af46052,
+0xbc6133a953131cfd, 0x3feed872b8950a73,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0x3c89e95e6f4a0ae4, 0x3feedacc9be14dca,
+0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
+0xbc90260cf07cb311, 0x3feedd333beb0b7e,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c1bca400a7b939d, 0x3feedfa6a1897fd2,
+0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
+0x3c9140bc34dfc19f, 0x3feee226d59a09ee,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0xbc8c9b1da461ab87, 0x3feee4b3e100301e,
+0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
+0x3c8c115f23ebea8e, 0x3feee74dcca5a413,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc6dcab99f23f84e, 0x3feee9f4a17a4735,
+0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
+0x3c60a43e8b7e4bfe, 0x3feeeca868742ee4,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0x3c915b1397075f04, 0x3feeef692a8fa8cd,
+0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
+0xbc839f7a1f04d2b0, 0x3feef236f0cf3f3a,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc86a510f31e13e6, 0x3feef511c43bbd62,
+0xbc7274aedac8ff80, 0x3feef68415b749b1,
+0xbc92887ea88e7340, 0x3feef7f9ade433c6,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0xbc6e57ac604759ba, 0x3feefaeeb6ddfc87,
+0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
+0x3c8e6c6db4f83226, 0x3feefdf0e844bfc6,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc8d1bf10460dba0, 0x3fef01004b3a7804,
+0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
+0x3c8e5d80813dddfc, 0x3fef041ce8e77680,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0x3c8caff9640f2dcb, 0x3fef0746ca7a67a7,
+0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
+0x3c7a77557fd62db3, 0x3fef0a7df9285775,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc651ba6128db749, 0x3fef0dc27e2cb5e5,
+0xbc302899507554e5, 0x3fef0f69c3f3a207,
+0xbc7c0ffefdc5e251, 0x3fef111462c95b60,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0xbc8b6cd058bfd6fa, 0x3fef1473b0468d30,
+0xbc80dda2d4c0010c, 0x3fef16286141b33d,
+0x3c923759b8aca76d, 0x3fef17e06ff301f4,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0xbc895498a73dac7d, 0x3fef1b5aab23e61e,
+0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
+0x3c851de924583108, 0x3fef1ee26b34e065,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0xbc8c5fe4051ba06c, 0x3fef2277b9881650,
+0x3c836909391181d3, 0x3fef244778fafb22,
+0xbc6d1816c0a9ac07, 0x3fef261a9f8630ad,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0xbc7af5c67c4e8235, 0x3fef29cb269e601f,
+0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
+0xbc8304ef0045d575, 0x3fef2d89584661a1,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0x3c8725f94f910375, 0x3fef31553dfa8313,
+0xbc7ac28b7bef6621, 0x3fef33405751c4db,
+0x3c7b53e99f9191e8, 0x3fef352ee13da7cb,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc810a79e6d7e2b8, 0x3fef39164b994d23,
+0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
+0x3c840635f6d2a9c0, 0x3fef3d0b869d8f0f,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0x3c549eeef9ec910c, 0x3fef410e9be12cb9,
+0xbc8cc734592af7fc, 0x3fef43155b5bab74,
+0xbc8335827ffb9dce, 0x3fef451f95018d17,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c645563980ef762, 0x3fef493e7ba2c38c,
+0x3c87752a44f587e8, 0x3fef4b532b08c968,
+0xbc8cd0205eb2aab2, 0x3fef4d6b596f948c,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0xbc8aab80ceab2b4a, 0x3fef51a638197a3c,
+0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
+0xbc8f870f40a8ba1b, 0x3fef55ef2158a91f,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c83c119f18464c5, 0x3fef5a461eec14be,
+0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
+0xbc5a628c2be4e7c7, 0x3fef5eab3a99745b,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0xbc72550d76be719a, 0x3fef631e7e2d479d,
+0x3c8c254d16117a68, 0x3fef655d71ff6075,
+0xbc82090274667d12, 0x3fef679ff37adb4a,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0x3c75f7d28150cac4, 0x3fef6c2fa45c4dfd,
+0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
+0x3c890de9296f4cd1, 0x3fef70cd9ab294e4,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0x3c832ff9978b34bc, 0x3fef7579e065807d,
+0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
+0xbc7303b63dda1980, 0x3fef7a347f63c159,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc81f2ba385f2f95, 0x3fef7efd81a2ece1,
+0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
+0x3c768d9144ae12fc, 0x3fef83d4f11f8220,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0x3c853687f542403b, 0x3fef88bad7dcee90,
+0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
+0xbc736ed2de40b407, 0x3fef8daf3fe592e8,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc614ef56c770f3b, 0x3fef92b2334ac7ee,
+0xbc776caa4c2ff1cf, 0x3fef953924676d76,
+0x3c8df7d1353d8e88, 0x3fef97c3bc24e350,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc850bed64091b8a, 0x3fef9ce3e4933c7e,
+0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
+0x3c89d852381c317f, 0x3fefa212b6bc3181,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c68a00e3cca04c4, 0x3fefa7503ccd2be5,
+0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
+0xbc5a1f25ce94cae7, 0x3fefac9c80faa594,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0xbc6fb5f3ee307976, 0x3fefb1f78d802dc2,
+0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
+0x3c737e8ae802b851, 0x3fefb7616ca06dd6,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c875119560e34af, 0x3fefbcda28a52e59,
+0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
+0xbc7431c3840929c6, 0x3fefc261cbdf5be7,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0xbc8cb472d2e86b99, 0x3fefc7f860a70c22,
+0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
+0x3c83f5df2fde16a8, 0x3fefcd9df15b82ac,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c8eef18336b62e3, 0x3fefd35288633625,
+0x3c901f3a75ee0efe, 0x3fefd632798844f8,
+0x3c80d23f87b50a2a, 0x3fefd916302bd526,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0x3c8302dee657c8e6, 0x3fefdee8f32a4b45,
+0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
+0xbc7b0caa080df170, 0x3fefe4cadbdac61d,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0x3c7617a9f2fd24e5, 0x3fefeabbf4c0ba54,
+0xbc699c7db2effc76, 0x3fefedba3692d514,
+0x3c75f103b8fd5ca7, 0x3feff0bc4866e8ad,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+0x3c8e70b094fa075a, 0x3feff6cbe15f6314,
+0x3c64b458677f9840, 0x3feff9d96b2a23d9,
+0xbc72ec9a3e5d680a, 0x3feffceaca4391b6,
+#endif
+},
+};
diff --git a/pl/math/expf.c b/pl/math/expf.c
new file mode 100644
index 0000000..c325e45
--- /dev/null
+++ b/pl/math/expf.c
@@ -0,0 +1,76 @@
+/*
+ * Single-precision e^x function.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+EXPF_TABLE_BITS = 5
+EXPF_POLY_ORDER = 3
+
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+Wrong count: 170635 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N (1 << EXPF_TABLE_BITS)
+#define InvLn2N __expf_data.invln2_scaled
+#define T __expf_data.tab
+#define C __expf_data.poly_scaled
+
+static inline uint32_t
+top12 (float x)
+{
+ return asuint (x) >> 20;
+}
+
+float
+optr_aor_exp_f32 (float x)
+{
+ uint32_t abstop;
+ uint64_t ki, t;
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t kd, xd, z, r, r2, y, s;
+
+ xd = (double_t) x;
+ abstop = top12 (x) & 0x7ff;
+ if (unlikely (abstop >= top12 (88.0f)))
+ {
+ /* |x| >= 88 or x is nan. */
+ if (asuint (x) == asuint (-INFINITY))
+ return 0.0f;
+ if (abstop >= top12 (INFINITY))
+ return x + x;
+ if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+ return __math_oflowf (0);
+ if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+ return __math_uflowf (0);
+ }
+
+ /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
+ z = InvLn2N * xd;
+
+ /* Round and convert z to int, the result is in [-150*N, 128*N] and
+ ideally nearest int is used, otherwise the magnitude of r can be
+ bigger which gives larger approximation error. */
+ kd = roundtoint (z);
+ ki = converttoint (z);
+ r = z - kd;
+
+ /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+ t = T[ki % N];
+ t += ki << (52 - EXPF_TABLE_BITS);
+ s = asdouble (t);
+ z = C[0] * r + C[1];
+ r2 = r * r;
+ y = C[2] * r + 1;
+ y = z * r2 + y;
+ y = y * s;
+ return eval_as_float (y);
+}
diff --git a/pl/math/expf_data.c b/pl/math/expf_data.c
new file mode 100644
index 0000000..474ad57
--- /dev/null
+++ b/pl/math/expf_data.c
@@ -0,0 +1,31 @@
+/*
+ * Coeffs and table entries for single-precision exp. Copied from
+ * math/exp2f_data.c, with EXP2F_TABLE_BITS == 32.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << EXPF_TABLE_BITS)
+
+const struct expf_data __expf_data = {
+ /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
+ used for computing 2^(k/N) for an int |k| < 150 N as
+ double(tab[k%N] + (k << 52-BITS)) */
+ .tab = {
+0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+ },
+ .invln2_scaled = 0x1.71547652b82fep+0 * N,
+ .poly_scaled = {
+ 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
+ },
+};
diff --git a/pl/math/expm1_2u5.c b/pl/math/expm1_2u5.c
new file mode 100644
index 0000000..a3faff7
--- /dev/null
+++ b/pl/math/expm1_2u5.c
@@ -0,0 +1,86 @@
+/*
+ * Double-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "estrin.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define InvLn2 0x1.71547652b82fep0
+#define Ln2hi 0x1.62e42fefa39efp-1
+#define Ln2lo 0x1.abc9e3b39803fp-56
+#define Shift 0x1.8p52
+#define TinyBound \
+ 0x3cc0000000000000 /* 0x1p-51, below which expm1(x) is within 2 ULP of x. */
+#define BigBound 0x1.63108c75a1937p+9 /* Above which expm1(x) overflows. */
+#define NegBound -0x1.740bf7c0d927dp+9 /* Below which expm1(x) rounds to 1. */
+#define AbsMask 0x7fffffffffffffff
+
+#define C(i) __expm1_poly[i]
+
+/* Approximation for exp(x) - 1 using polynomial on a reduced interval.
+ The maximum error observed error is 2.17 ULP:
+ expm1(0x1.63f90a866748dp-2) got 0x1.a9af56603878ap-2
+ want 0x1.a9af566038788p-2. */
+double
+expm1 (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ax = ix & AbsMask;
+
+ /* Tiny, +Infinity. */
+ if (ax <= TinyBound || ix == 0x7ff0000000000000)
+ return x;
+
+ /* +/-NaN. */
+ if (ax > 0x7ff0000000000000)
+ return __math_invalid (x);
+
+ /* Result is too large to be represented as a double. */
+ if (x >= 0x1.63108c75a1937p+9)
+ return __math_oflow (0);
+
+ /* Result rounds to -1 in double precision. */
+ if (x <= NegBound)
+ return -1;
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ double j = fma (InvLn2, x, Shift) - Shift;
+ int64_t i = j;
+ double f = fma (j, -Ln2hi, x);
+ f = fma (j, -Ln2lo, f);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ double f2 = f * f;
+ double f4 = f2 * f2;
+ double p = fma (f2, ESTRIN_10 (f, f2, f4, f4 * f4, C), f);
+
+ /* Assemble the result, using a slight rearrangement to achieve acceptable
+ accuracy.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^(i - 1). */
+ double t = ldexp (0.5, i);
+ /* expm1(x) ~= 2 * (p * t + (t - 1/2)). */
+ return 2 * fma (p, t, t - 0.5);
+}
+
+PL_SIG (S, D, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (expm1, 1.68)
+PL_TEST_INTERVAL (expm1, 0, 0x1p-51, 1000)
+PL_TEST_INTERVAL (expm1, -0, -0x1p-51, 1000)
+PL_TEST_INTERVAL (expm1, 0x1p-51, 0x1.63108c75a1937p+9, 100000)
+PL_TEST_INTERVAL (expm1, -0x1p-51, -0x1.740bf7c0d927dp+9, 100000)
+PL_TEST_INTERVAL (expm1, 0x1.63108c75a1937p+9, inf, 100)
+PL_TEST_INTERVAL (expm1, -0x1.740bf7c0d927dp+9, -inf, 100)
diff --git a/pl/math/expm1_data.c b/pl/math/expm1_data.c
new file mode 100644
index 0000000..ff7426b
--- /dev/null
+++ b/pl/math/expm1_data.c
@@ -0,0 +1,21 @@
+/*
+ * Coefficients for double-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Generated using fpminimax, see tools/expm1.sollya for details. */
+const double __expm1_poly[] = {0x1p-1,
+ 0x1.5555555555559p-3,
+ 0x1.555555555554bp-5,
+ 0x1.111111110f663p-7,
+ 0x1.6c16c16c1b5f3p-10,
+ 0x1.a01a01affa35dp-13,
+ 0x1.a01a018b4ecbbp-16,
+ 0x1.71ddf82db5bb4p-19,
+ 0x1.27e517fc0d54bp-22,
+ 0x1.af5eedae67435p-26,
+ 0x1.1f143d060a28ap-29};
diff --git a/pl/math/expm1f_1u6.c b/pl/math/expm1f_1u6.c
new file mode 100644
index 0000000..70b14e4
--- /dev/null
+++ b/pl/math/expm1f_1u6.c
@@ -0,0 +1,80 @@
+/*
+ * Single-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "hornerf.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Shift (0x1.8p23f)
+#define InvLn2 (0x1.715476p+0f)
+#define Ln2hi (0x1.62e4p-1f)
+#define Ln2lo (0x1.7f7d1cp-20f)
+#define AbsMask (0x7fffffff)
+#define InfLimit \
+ (0x1.644716p6) /* Smallest value of x for which expm1(x) overflows. */
+#define NegLimit \
+ (-0x1.9bbabcp+6) /* Largest value of x for which expm1(x) rounds to 1. */
+
+#define C(i) __expm1f_poly[i]
+
+/* Approximation for exp(x) - 1 using polynomial on a reduced interval.
+ The maximum error is 1.51 ULP:
+ expm1f(0x1.8baa96p-2) got 0x1.e2fb9p-2
+ want 0x1.e2fb94p-2. */
+float
+expm1f (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ax = ix & AbsMask;
+
+ /* Tiny: |x| < 0x1p-23. expm1(x) is closely approximated by x.
+ Inf: x == +Inf => expm1(x) = x. */
+ if (ax <= 0x34000000 || (ix == 0x7f800000))
+ return x;
+
+ /* +/-NaN. */
+ if (ax > 0x7f800000)
+ return __math_invalidf (x);
+
+ if (x >= InfLimit)
+ return __math_oflowf (0);
+
+ if (x <= NegLimit || ix == 0xff800000)
+ return -1;
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ float j = fmaf (InvLn2, x, Shift) - Shift;
+ int32_t i = j;
+ float f = fmaf (j, -Ln2hi, x);
+ f = fmaf (j, -Ln2lo, f);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ float p = fmaf (f * f, HORNER_4 (f, C), f);
+ /* Assemble the result, using a slight rearrangement to achieve acceptable
+ accuracy.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^(i - 1). */
+ float t = ldexpf (0.5f, i);
+ /* expm1(x) ~= 2 * (p * t + (t - 1/2)). */
+ return 2 * fmaf (p, t, t - 0.5f);
+}
+
+PL_SIG (S, F, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (expm1f, 1.02)
+PL_TEST_INTERVAL (expm1f, 0, 0x1p-23, 1000)
+PL_TEST_INTERVAL (expm1f, -0, -0x1p-23, 1000)
+PL_TEST_INTERVAL (expm1f, 0x1p-23, 0x1.644716p6, 100000)
+PL_TEST_INTERVAL (expm1f, -0x1p-23, -0x1.9bbabcp+6, 100000)
diff --git a/pl/math/expm1f_data.c b/pl/math/expm1f_data.c
new file mode 100644
index 0000000..9d02dc4
--- /dev/null
+++ b/pl/math/expm1f_data.c
@@ -0,0 +1,12 @@
+/*
+ * Coefficients for single-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Generated using fpminimax, see tools/expm1f.sollya for details. */
+const float __expm1f_poly[] = {0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5,
+ 0x1.12287cp-7, 0x1.6b55a2p-10};
diff --git a/pl/math/horner.h b/pl/math/horner.h
new file mode 100644
index 0000000..f92ab67
--- /dev/null
+++ b/pl/math/horner.h
@@ -0,0 +1,14 @@
+/*
+ * Helper macros for single-precision Horner polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#if V_SUPPORTED
+#define FMA v_fma_f64
+#else
+#define FMA fma
+#endif
+
+#include "horner_wrap.h"
diff --git a/pl/math/horner_wrap.h b/pl/math/horner_wrap.h
new file mode 100644
index 0000000..6478968
--- /dev/null
+++ b/pl/math/horner_wrap.h
@@ -0,0 +1,34 @@
+/*
+ * Helper macros for Horner polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+// clang-format off
+#define HORNER_1_(x, c, i) FMA(c(i + 1), x, c(i))
+#define HORNER_2_(x, c, i) FMA(HORNER_1_ (x, c, i + 1), x, c(i))
+#define HORNER_3_(x, c, i) FMA(HORNER_2_ (x, c, i + 1), x, c(i))
+#define HORNER_4_(x, c, i) FMA(HORNER_3_ (x, c, i + 1), x, c(i))
+#define HORNER_5_(x, c, i) FMA(HORNER_4_ (x, c, i + 1), x, c(i))
+#define HORNER_6_(x, c, i) FMA(HORNER_5_ (x, c, i + 1), x, c(i))
+#define HORNER_7_(x, c, i) FMA(HORNER_6_ (x, c, i + 1), x, c(i))
+#define HORNER_8_(x, c, i) FMA(HORNER_7_ (x, c, i + 1), x, c(i))
+#define HORNER_9_(x, c, i) FMA(HORNER_8_ (x, c, i + 1), x, c(i))
+#define HORNER_10_(x, c, i) FMA(HORNER_9_ (x, c, i + 1), x, c(i))
+#define HORNER_11_(x, c, i) FMA(HORNER_10_(x, c, i + 1), x, c(i))
+#define HORNER_12_(x, c, i) FMA(HORNER_11_(x, c, i + 1), x, c(i))
+
+#define HORNER_1(x, c) HORNER_1_ (x, c, 0)
+#define HORNER_2(x, c) HORNER_2_ (x, c, 0)
+#define HORNER_3(x, c) HORNER_3_ (x, c, 0)
+#define HORNER_4(x, c) HORNER_4_ (x, c, 0)
+#define HORNER_5(x, c) HORNER_5_ (x, c, 0)
+#define HORNER_6(x, c) HORNER_6_ (x, c, 0)
+#define HORNER_7(x, c) HORNER_7_ (x, c, 0)
+#define HORNER_8(x, c) HORNER_8_ (x, c, 0)
+#define HORNER_9(x, c) HORNER_9_ (x, c, 0)
+#define HORNER_10(x, c) HORNER_10_(x, c, 0)
+#define HORNER_11(x, c) HORNER_11_(x, c, 0)
+#define HORNER_12(x, c) HORNER_12_(x, c, 0)
+// clang-format on
diff --git a/pl/math/hornerf.h b/pl/math/hornerf.h
new file mode 100644
index 0000000..0703817
--- /dev/null
+++ b/pl/math/hornerf.h
@@ -0,0 +1,14 @@
+/*
+ * Helper macros for double-precision Horner polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#if V_SUPPORTED
+#define FMA v_fma_f32
+#else
+#define FMA fmaf
+#endif
+
+#include "horner_wrap.h"
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
new file mode 100644
index 0000000..af5f9f9
--- /dev/null
+++ b/pl/math/include/mathlib.h
@@ -0,0 +1,244 @@
+// clang-format off
+/*
+ * Public API.
+ *
+ * Copyright (c) 2015-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _MATHLIB_H
+#define _MATHLIB_H
+
+float acoshf (float);
+float asinhf (float);
+float atan2f (float, float);
+float atanf (float);
+float atanhf (float);
+float cbrtf (float);
+float coshf (float);
+float erfcf (float);
+float erff (float);
+float expm1f (float);
+float log10f (float);
+float log1pf (float);
+float sinhf (float);
+float tanf (float);
+float tanhf (float);
+
+double acosh (double);
+double asinh (double);
+double atan (double);
+double atan2 (double, double);
+double atanh (double);
+double cbrt (double);
+double cosh (double);
+double erfc (double);
+double expm1 (double);
+double log10 (double);
+double log1p (double);
+double sinh (double);
+double tanh (double);
+
+float __s_acoshf (float);
+float __s_asinhf (float);
+float __s_atanf (float);
+float __s_atan2f (float, float);
+float __s_atanhf (float);
+float __s_cbrtf (float);
+float __s_coshf (float);
+float __s_erfcf (float);
+float __s_erff (float);
+float __s_expm1f (float);
+float __s_log10f (float);
+float __s_log1pf (float);
+float __s_log2f (float);
+float __s_sinhf (float);
+float __s_tanf (float);
+float __s_tanhf (float);
+
+double __s_acosh (double);
+double __s_asinh (double);
+double __s_atan (double);
+double __s_atan2 (double, double);
+double __s_atanh (double);
+double __s_cbrt (double);
+double __s_cosh (double);
+double __s_erf (double);
+double __s_erfc (double);
+double __s_expm1 (double);
+double __s_log10 (double);
+double __s_log1p (double);
+double __s_log2 (double);
+double __s_sinh (double);
+double __s_tan (double);
+double __s_tanh (double);
+
+#if __aarch64__
+#if __GNUC__ >= 5
+typedef __Float32x4_t __f32x4_t;
+typedef __Float64x2_t __f64x2_t;
+#elif __clang_major__*100+__clang_minor__ >= 305
+typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
+typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
+#else
+#error Unsupported compiler
+#endif
+
+/* Vector functions following the base PCS. */
+__f32x4_t __v_acoshf (__f32x4_t);
+__f64x2_t __v_acosh (__f64x2_t);
+__f32x4_t __v_asinhf (__f32x4_t);
+__f64x2_t __v_asinh (__f64x2_t);
+__f32x4_t __v_atanf (__f32x4_t);
+__f64x2_t __v_atan (__f64x2_t);
+__f32x4_t __v_atan2f (__f32x4_t, __f32x4_t);
+__f64x2_t __v_atan2 (__f64x2_t, __f64x2_t);
+__f32x4_t __v_atanhf (__f32x4_t);
+__f64x2_t __v_atanh (__f64x2_t);
+__f32x4_t __v_cbrtf (__f32x4_t);
+__f64x2_t __v_cbrt (__f64x2_t);
+__f32x4_t __v_coshf (__f32x4_t);
+__f64x2_t __v_cosh (__f64x2_t);
+__f32x4_t __v_erff (__f32x4_t);
+__f64x2_t __v_erf (__f64x2_t);
+__f32x4_t __v_erfcf (__f32x4_t);
+__f64x2_t __v_erfc (__f64x2_t);
+__f32x4_t __v_expm1f (__f32x4_t);
+__f64x2_t __v_expm1 (__f64x2_t);
+__f32x4_t __v_log10f (__f32x4_t);
+__f64x2_t __v_log10 (__f64x2_t);
+__f32x4_t __v_log1pf (__f32x4_t);
+__f64x2_t __v_log1p (__f64x2_t);
+__f32x4_t __v_log2f (__f32x4_t);
+__f64x2_t __v_log2 (__f64x2_t);
+__f32x4_t __v_sinhf (__f32x4_t);
+__f64x2_t __v_sinh (__f64x2_t);
+__f32x4_t __v_tanf (__f32x4_t);
+__f64x2_t __v_tan (__f64x2_t);
+__f32x4_t __v_tanhf (__f32x4_t);
+__f64x2_t __v_tanh (__f64x2_t);
+
+#if __GNUC__ >= 9 || __clang_major__ >= 8
+#define __vpcs __attribute__((__aarch64_vector_pcs__))
+
+/* Vector functions following the vector PCS. */
+__vpcs __f32x4_t __vn_acoshf (__f32x4_t);
+__vpcs __f64x2_t __vn_acosh (__f64x2_t);
+__vpcs __f32x4_t __vn_asinhf (__f32x4_t);
+__vpcs __f64x2_t __vn_asinh (__f64x2_t);
+__vpcs __f32x4_t __vn_atanf (__f32x4_t);
+__vpcs __f64x2_t __vn_atan (__f64x2_t);
+__vpcs __f32x4_t __vn_atan2f (__f32x4_t, __f32x4_t);
+__vpcs __f64x2_t __vn_atan2 (__f64x2_t, __f64x2_t);
+__vpcs __f32x4_t __vn_atanhf (__f32x4_t);
+__vpcs __f64x2_t __vn_atanh (__f64x2_t);
+__vpcs __f32x4_t __vn_cbrtf (__f32x4_t);
+__vpcs __f64x2_t __vn_cbrt (__f64x2_t);
+__vpcs __f32x4_t __vn_coshf (__f32x4_t);
+__vpcs __f64x2_t __vn_cosh (__f64x2_t);
+__vpcs __f32x4_t __vn_erff (__f32x4_t);
+__vpcs __f64x2_t __vn_erf (__f64x2_t);
+__vpcs __f32x4_t __vn_erfcf (__f32x4_t);
+__vpcs __f64x2_t __vn_erfc (__f64x2_t);
+__vpcs __f32x4_t __vn_expm1f (__f32x4_t);
+__vpcs __f64x2_t __vn_expm1 (__f64x2_t);
+__vpcs __f32x4_t __vn_log10f (__f32x4_t);
+__vpcs __f64x2_t __vn_log10 (__f64x2_t);
+__vpcs __f32x4_t __vn_log1pf (__f32x4_t);
+__vpcs __f64x2_t __vn_log1p (__f64x2_t);
+__vpcs __f32x4_t __vn_log2f (__f32x4_t);
+__vpcs __f64x2_t __vn_log2 (__f64x2_t);
+__vpcs __f32x4_t __vn_sinhf (__f32x4_t);
+__vpcs __f64x2_t __vn_sinh (__f64x2_t);
+__vpcs __f32x4_t __vn_tanf (__f32x4_t);
+__vpcs __f64x2_t __vn_tan (__f64x2_t);
+__vpcs __f32x4_t __vn_tanhf (__f32x4_t);
+__vpcs __f64x2_t __vn_tanh (__f64x2_t);
+
+/* Vector functions following the vector PCS using ABI names. */
+__vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t);
+__vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_erfcf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_erfc (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_expm1f (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_expm1 (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_tanh (__f64x2_t);
+
+#endif
+
+#if WANT_SVE_MATH
+#include <arm_sve.h>
+svfloat32_t __sv_atan2f_x (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32_t __sv_atanf_x (svfloat32_t, svbool_t);
+svfloat64_t __sv_atan_x (svfloat64_t, svbool_t);
+svfloat64_t __sv_atan2_x (svfloat64_t, svfloat64_t, svbool_t);
+svfloat32_t __sv_cosf_x (svfloat32_t, svbool_t);
+svfloat64_t __sv_cos_x (svfloat64_t, svbool_t);
+svfloat32_t __sv_erff_x (svfloat32_t, svbool_t);
+svfloat64_t __sv_erf_x (svfloat64_t, svbool_t);
+svfloat64_t __sv_erfc_x (svfloat64_t, svbool_t);
+svfloat32_t __sv_expf_x (svfloat32_t, svbool_t);
+svfloat32_t __sv_logf_x (svfloat32_t, svbool_t);
+svfloat64_t __sv_log_x (svfloat64_t, svbool_t);
+svfloat32_t __sv_log10f_x (svfloat32_t, svbool_t);
+svfloat64_t __sv_log10_x (svfloat64_t, svbool_t);
+svfloat32_t __sv_log2f_x (svfloat32_t, svbool_t);
+svfloat64_t __sv_log2_x (svfloat64_t, svbool_t);
+svfloat32_t __sv_powif_x (svfloat32_t, svint32_t, svbool_t);
+svfloat64_t __sv_powi_x (svfloat64_t, svint64_t, svbool_t);
+svfloat32_t __sv_sinf_x (svfloat32_t, svbool_t);
+svfloat64_t __sv_sin_x (svfloat64_t, svbool_t);
+svfloat32_t __sv_tanf_x (svfloat32_t, svbool_t);
+/* SVE ABI names. */
+svfloat32_t _ZGVsMxvv_atan2f (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_atanf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_atan (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxvv_atan2 (svfloat64_t, svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_cosf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_cos (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_erff (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_erf (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_erfc (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_expf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_logf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_log (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_log10f (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_log10 (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_log2f (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_log2 (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxvv_powi(svfloat32_t, svint32_t, svbool_t);
+svfloat64_t _ZGVsMxvv_powk(svfloat64_t, svint64_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_sin (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanf (svfloat32_t, svbool_t);
+#endif
+
+#endif
+
+#endif
+// clang-format on
diff --git a/pl/math/include/pl_test.h b/pl/math/include/pl_test.h
new file mode 100644
index 0000000..6a81360
--- /dev/null
+++ b/pl/math/include/pl_test.h
@@ -0,0 +1,26 @@
+/*
+ * PL macros to aid testing. This version of this file is used for building the
+ * routine, not the tests. Separate definitions are found in test/pl_test.h
+ * which emit test parameters.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+/* Emit max ULP threshold - silenced for building the routine. */
+#define PL_TEST_ULP(f, l)
+
+/* Emit alias. The PL_TEST_ALIAS declaration is piggy-backed on top of
+ strong_alias. Use PL_ALIAS instead of strong_alias to make sure the alias is
+ also added to the test suite. */
+#define PL_ALIAS(a, b) strong_alias (a, b)
+
+/* Emit routine name if e == 1 and f is expected to correctly trigger fenv
+ exceptions. e allows declaration to be emitted conditionally upon certain
+ build flags - defer expansion by one pass to allow those flags to be expanded
+ properly. */
+#define PL_TEST_EXPECT_FENV(f, e)
+#define PL_TEST_EXPECT_FENV_ALWAYS(f)
+
+#define PL_TEST_INTERVAL(f, lo, hi, n)
+#define PL_TEST_INTERVAL_C(f, lo, hi, n, c)
diff --git a/pl/math/log.c b/pl/math/log.c
new file mode 100644
index 0000000..40b0441
--- /dev/null
+++ b/pl/math/log.c
@@ -0,0 +1,161 @@
+/*
+ * Double-precision log(x) function.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+#define T __log_data.tab
+#define T2 __log_data.tab2
+#define B __log_data.poly1
+#define A __log_data.poly
+#define Ln2hi __log_data.ln2hi
+#define Ln2lo __log_data.ln2lo
+#define N (1 << LOG_TABLE_BITS)
+#define OFF 0x3fe6000000000000
+
+/* Top 16 bits of a double. */
+static inline uint32_t
+top16 (double x)
+{
+ return asuint64 (x) >> 48;
+}
+
+double
+optr_aor_log_f64 (double x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+ uint64_t ix, iz, tmp;
+ uint32_t top;
+ int k, i;
+
+ ix = asuint64 (x);
+ top = top16 (x);
+
+#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
+#define LO asuint64 (1.0 - 0x1p-5)
+#define HI asuint64 (1.0 + 0x1.1p-5)
+#elif LOG_POLY1_ORDER == 12
+#define LO asuint64 (1.0 - 0x1p-4)
+#define HI asuint64 (1.0 + 0x1.09p-4)
+#endif
+ if (unlikely (ix - LO < HI - LO))
+ {
+ /* Handle close to 1.0 inputs separately. */
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
+ return 0;
+ r = x - 1.0;
+ r2 = r * r;
+ r3 = r * r2;
+#if LOG_POLY1_ORDER == 10
+ /* Worst-case error is around 0.516 ULP. */
+ y = r3
+ * (B[1] + r * B[2] + r2 * B[3]
+ + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
+ w = B[0] * r2; /* B[0] == -0.5. */
+ hi = r + w;
+ y += r - hi + w;
+ y += hi;
+#elif LOG_POLY1_ORDER == 11
+ /* Worst-case error is around 0.516 ULP. */
+ y = r3
+ * (B[1] + r * B[2]
+ + r2
+ * (B[3] + r * B[4] + r2 * B[5]
+ + r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
+ w = B[0] * r2; /* B[0] == -0.5. */
+ hi = r + w;
+ y += r - hi + w;
+ y += hi;
+#elif LOG_POLY1_ORDER == 12
+ y = r3
+ * (B[1] + r * B[2] + r2 * B[3]
+ + r3
+ * (B[4] + r * B[5] + r2 * B[6]
+ + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+#if N <= 64
+ /* Worst-case error is around 0.532 ULP. */
+ w = B[0] * r2; /* B[0] == -0.5. */
+ hi = r + w;
+ y += r - hi + w;
+ y += hi;
+#else
+ /* Worst-case error is around 0.507 ULP. */
+ w = r * 0x1p27;
+ double_t rhi = r + w - w;
+ double_t rlo = r - rhi;
+ w = rhi * rhi * B[0]; /* B[0] == -0.5. */
+ hi = r + w;
+ lo = r - hi + w;
+ lo += B[0] * rlo * (rhi + r);
+ y += lo;
+ y += hi;
+#endif
+#endif
+ return eval_as_double (y);
+ }
+ if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
+ {
+ /* x < 0x1p-1022 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzero (1);
+ if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
+ return x;
+ if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+ return __math_invalid (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint64 (x * 0x1p52);
+ ix -= 52ULL << 52;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
+ k = (int64_t) tmp >> 52; /* arithmetic shift */
+ iz = ix - (tmp & 0xfffULL << 52);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = asdouble (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ /* r ~= z/c - 1, |r| < 1/(2*N). */
+#if HAVE_FAST_FMA
+ /* rounding error: 0x1p-55/N. */
+ r = fma (z, invc, -1.0);
+#else
+ /* rounding error: 0x1p-55/N + 0x1p-66. */
+ r = (z - T2[i].chi - T2[i].clo) * invc;
+#endif
+ kd = (double_t) k;
+
+ /* hi + lo = r + log(c) + k*Ln2. */
+ w = kd * Ln2hi + logc;
+ hi = w + r;
+ lo = w - hi + r + kd * Ln2lo;
+
+ /* log(x) = lo + (log1p(r) - r) + hi. */
+ r2 = r * r; /* rounding error: 0x1p-54/N^2. */
+ /* Worst case error if |y| > 0x1p-5:
+ 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
+ Worst case error if |y| > 0x1p-4:
+ 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
+#if LOG_POLY_ORDER == 6
+ y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+#elif LOG_POLY_ORDER == 7
+ y = lo
+ + r2
+ * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+ + r2 * r2 * (A[4] + r * A[5]))
+ + hi;
+#endif
+ return eval_as_double (y);
+}
diff --git a/pl/math/log10_2u.c b/pl/math/log10_2u.c
new file mode 100644
index 0000000..74828ea
--- /dev/null
+++ b/pl/math/log10_2u.c
@@ -0,0 +1,150 @@
+/*
+ * Double-precision log10(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Polynomial coefficients and lookup tables. */
+#define T __log10_data.tab
+#define T2 __log10_data.tab2
+#define B __log10_data.poly1
+#define A __log10_data.poly
+#define Ln2hi __log10_data.ln2hi
+#define Ln2lo __log10_data.ln2lo
+#define InvLn10 __log10_data.invln10
+#define N (1 << LOG10_TABLE_BITS)
+#define OFF 0x3fe6000000000000
+#define LO asuint64 (1.0 - 0x1p-4)
+#define HI asuint64 (1.0 + 0x1.09p-4)
+
+/* Top 16 bits of a double. */
+static inline uint32_t
+top16 (double x)
+{
+ return asuint64 (x) >> 48;
+}
+
+/* Fast and low accuracy implementation of log10.
+ The implementation is similar to that of math/log, except that:
+ - Polynomials are computed for log10(1+r) with r on same intervals as log.
+ - Lookup parameters are scaled (at runtime) to switch from base e to base 10.
+ Many errors above 1.59 ulp are observed across the whole range of doubles.
+ The greatest observed error is 1.61 ulp, at around 0.965:
+ log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6
+ want -0x1.fee26884905a8p-6. */
+double
+log10 (double x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+ uint64_t ix, iz, tmp;
+ uint32_t top;
+ int k, i;
+
+ ix = asuint64 (x);
+ top = top16 (x);
+
+ if (unlikely (ix - LO < HI - LO))
+ {
+ /* Handle close to 1.0 inputs separately. */
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
+ return 0;
+ r = x - 1.0;
+ r2 = r * r;
+ r3 = r * r2;
+ y = r3
+ * (B[1] + r * B[2] + r2 * B[3]
+ + r3
+ * (B[4] + r * B[5] + r2 * B[6]
+ + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+ /* Worst-case error is around 0.507 ULP. */
+ w = r * 0x1p27;
+ double_t rhi = r + w - w;
+ double_t rlo = r - rhi;
+ w = rhi * rhi * B[0];
+ hi = r + w;
+ lo = r - hi + w;
+ lo += B[0] * rlo * (rhi + r);
+ y += lo;
+ y += hi;
+ /* Scale by 1/ln(10). Polynomial already contains scaling. */
+ y = y * InvLn10;
+
+ return eval_as_double (y);
+ }
+ if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
+ {
+ /* x < 0x1p-1022 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzero (1);
+ if (ix == asuint64 (INFINITY)) /* log10(inf) == inf. */
+ return x;
+ if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+ return __math_invalid (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint64 (x * 0x1p52);
+ ix -= 52ULL << 52;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (52 - LOG10_TABLE_BITS)) % N;
+ k = (int64_t) tmp >> 52; /* arithmetic shift. */
+ iz = ix - (tmp & 0xfffULL << 52);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = asdouble (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ /* r ~= z/c - 1, |r| < 1/(2*N). */
+#if HAVE_FAST_FMA
+ /* rounding error: 0x1p-55/N. */
+ r = fma (z, invc, -1.0);
+#else
+ /* rounding error: 0x1p-55/N + 0x1p-66. */
+ r = (z - T2[i].chi - T2[i].clo) * invc;
+#endif
+ kd = (double_t) k;
+
+ /* w = log(c) + k*Ln2hi. */
+ w = kd * Ln2hi + logc;
+ hi = w + r;
+ lo = w - hi + r + kd * Ln2lo;
+
+ /* log10(x) = (w + r)/log(10) + (log10(1+r) - r/log(10)). */
+ r2 = r * r; /* rounding error: 0x1p-54/N^2. */
+
+ /* Scale by 1/ln(10). Polynomial already contains scaling. */
+ y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+ y = y * InvLn10;
+
+ return eval_as_double (y);
+}
+
+// clang-format off
+#if USE_GLIBC_ABI
+strong_alias (log10, __log10_finite)
+hidden_alias (log10, __ieee754_log10)
+#if LDBL_MANT_DIG == 53
+long double
+log10l (long double x)
+{
+ return log10 (x);
+}
+#endif
+#endif
+// clang-format on
+
+PL_SIG (S, D, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (log10, 1.11)
+PL_TEST_INTERVAL (log10, 0, 0xffff000000000000, 10000)
+PL_TEST_INTERVAL (log10, 0x1p-4, 0x1p4, 40000)
+PL_TEST_INTERVAL (log10, 0, inf, 40000)
diff --git a/pl/math/log10_data.c b/pl/math/log10_data.c
new file mode 100644
index 0000000..9976f19
--- /dev/null
+++ b/pl/math/log10_data.c
@@ -0,0 +1,337 @@
+/*
+ * Data for log10.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << LOG10_TABLE_BITS)
+
+const struct log10_data __log10_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.invln10 = 0x1.bcb7b1526e50ep-2,
+.poly1 = {
+#if LOG10_POLY1_ORDER == 12
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
+#endif
+},
+.poly = {
+#if N == 128 && LOG10_POLY_ORDER == 6
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
+#endif
+},
+/* Algorithm:
+
+ x = 2^k z
+ log(x) = k ln2 + log(c) + log(z/c)
+ log(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+ tab[i].invc = 1/c
+ tab[i].logc = (double)log(c)
+ tab2[i].chi = (double)c
+ tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+ 1) the rounding error in 0x1.8p9 + logc is 0,
+ 2) the rounding error in z - chi - clo is < 0x1p-66 and
+ 3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
+
+Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
+2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
+a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log(x)| < 0x1p-4, this is not enough so that is special cased. */
+.tab = {
+#if N == 128
+{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
+{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
+{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
+{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
+{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
+{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
+{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
+{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
+{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
+{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
+{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
+{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
+{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
+{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
+{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
+{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
+{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
+{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
+{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
+{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
+{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
+{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
+{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
+{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
+{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
+{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
+{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
+{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
+{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
+{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
+{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
+{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
+{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
+{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
+{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
+{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
+{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
+{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
+{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
+{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
+{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
+{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
+{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
+{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
+{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
+{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
+{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
+{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
+{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
+{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
+{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
+{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
+{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
+{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
+{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
+{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
+{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
+{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
+{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
+{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
+{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
+{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
+{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
+{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
+{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
+{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
+{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
+{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
+{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
+{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
+{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
+{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
+{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
+{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
+{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
+{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
+{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
+{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
+{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
+{0x1.008040614b195p+0, -0x1.0040979240000p-9},
+{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
+{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
+{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
+{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
+{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
+{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
+{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
+{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
+{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
+{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
+{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
+{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
+{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
+{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
+{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
+{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
+{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
+{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
+{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
+{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
+{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
+{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
+{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
+{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
+{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
+{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
+{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
+{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
+{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
+{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
+{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
+{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
+{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
+{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
+{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
+{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
+{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
+{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
+{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
+{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
+{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
+{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
+{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
+{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
+{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
+{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
+{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
+{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
+#endif
+},
+#if !HAVE_FAST_FMA
+.tab2 = {
+#if N == 128
+{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
+{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
+{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
+{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
+{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
+{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
+{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
+{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
+{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
+{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
+{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
+{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
+{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
+{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
+{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
+{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
+{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
+{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
+{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
+{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
+{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
+{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
+{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
+{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
+{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
+{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
+{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
+{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
+{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
+{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
+{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
+{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
+{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
+{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
+{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
+{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
+{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
+{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
+{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
+{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
+{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
+{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
+{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
+{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
+{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
+{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
+{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
+{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
+{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
+{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
+{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
+{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
+{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
+{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
+{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
+{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
+{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
+{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
+{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
+{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
+{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
+{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
+{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
+{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
+{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
+{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
+{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
+{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
+{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
+{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
+{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
+{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
+{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
+{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
+{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
+{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
+{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
+{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
+{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
+{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
+{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
+{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
+{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
+{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
+{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
+{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
+{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
+{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
+{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
+{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
+{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
+{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
+{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
+{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
+{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
+{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
+{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
+{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
+{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
+{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
+{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
+{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
+{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
+{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
+{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
+{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
+{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
+{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
+{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
+{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
+{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
+{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
+{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
+{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
+{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
+{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
+{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
+{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
+{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
+{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
+{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
+{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
+{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
+{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
+{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
+{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
+{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
+{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
+#endif
+},
+#endif /* !HAVE_FAST_FMA */
+};
diff --git a/pl/math/log10f.c b/pl/math/log10f.c
new file mode 100644
index 0000000..5c80008
--- /dev/null
+++ b/pl/math/log10f.c
@@ -0,0 +1,97 @@
+/*
+ * Single-precision log10 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Data associated to logf:
+
+ LOGF_TABLE_BITS = 4
+ LOGF_POLY_ORDER = 4
+
+ ULP error: 0.818 (nearest rounding.)
+ Relative error: 1.957 * 2^-26 (before rounding.). */
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define InvLn10 __logf_data.invln10
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
+
+/* This naive implementation of log10f mimics that of log
+ then simply scales the result by 1/log(10) to switch from base e to
+ base 10. Hence, most computations are carried out in double precision.
+ Scaling before rounding to single precision is both faster and more accurate.
+
+ ULP error: 0.797 ulp (nearest rounding.). */
+float
+log10f (float x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t z, r, r2, y, y0, invc, logc;
+ uint32_t ix, iz, tmp;
+ int k, i;
+
+ ix = asuint (x);
+#if WANT_ROUNDING
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (unlikely (ix == 0x3f800000))
+ return 0;
+#endif
+ if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+ {
+ /* x < 0x1p-126 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzerof (1);
+ if (ix == 0x7f800000) /* log(inf) == inf. */
+ return x;
+ if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+ return __math_invalidf (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint (x * 0x1p23f);
+ ix -= 23 << 23;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+ k = (int32_t) tmp >> 23; /* arithmetic shift. */
+ iz = ix - (tmp & 0xff800000);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = (double_t) asfloat (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ r = z * invc - 1;
+ y0 = logc + (double_t) k * Ln2;
+
+ /* Pipelined polynomial evaluation to approximate log1p(r). */
+ r2 = r * r;
+ y = A[1] * r + A[2];
+ y = A[0] * r2 + y;
+ y = y * r2 + (y0 + r);
+
+ /* Multiply by 1/log(10). */
+ y = y * InvLn10;
+
+ return eval_as_float (y);
+}
+
+PL_SIG (S, F, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (log10f, 0.30)
+PL_TEST_INTERVAL (log10f, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (log10f, 0x1p-127, 0x1p-26, 50000)
+PL_TEST_INTERVAL (log10f, 0x1p-26, 0x1p3, 50000)
+PL_TEST_INTERVAL (log10f, 0x1p-4, 0x1p4, 50000)
+PL_TEST_INTERVAL (log10f, 0, inf, 50000)
diff --git a/pl/math/log1p_2u.c b/pl/math/log1p_2u.c
new file mode 100644
index 0000000..23c8ed4
--- /dev/null
+++ b/pl/math/log1p_2u.c
@@ -0,0 +1,136 @@
+/*
+ * Double-precision log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "estrin.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Ln2Hi 0x1.62e42fefa3800p-1
+#define Ln2Lo 0x1.ef35793c76730p-45
+#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)). */
+#define OneMHfRt2Top \
+ 0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)). */
+#define OneTop12 0x3ff
+#define BottomMask 0xffffffff
+#define OneMHfRt2 0x3fd2bec333018866
+#define Rt2MOne 0x3fda827999fcef32
+#define AbsMask 0x7fffffffffffffff
+#define ExpM63 0x3c00
+#define C(i) __log1p_data.coeffs[i]
+
+static inline double
+eval_poly (double f)
+{
+ double f2 = f * f;
+ double f4 = f2 * f2;
+ double f8 = f4 * f4;
+ return ESTRIN_18 (f, f2, f4, f8, f8 * f8, C);
+}
+
+/* log1p approximation using polynomial on reduced interval. Largest
+ observed errors are near the lower boundary of the region where k
+ is 0.
+ Maximum measured error: 1.75ULP.
+ log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-2
+ want -0x1.65fb8659a2f92p-2. */
+double
+log1p (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & AbsMask;
+ uint32_t ia16 = ia >> 48;
+
+ /* Handle special cases first. */
+ if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000
+ || ix == 0x8000000000000000))
+ {
+ if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)
+ {
+ /* x == -0 => log1p(x) = -0.
+ x == Inf => log1p(x) = Inf. */
+ return x;
+ }
+ if (ix == 0xbff0000000000000)
+ {
+ /* x == -1 => log1p(x) = -Inf. */
+ return __math_divzero (-1);
+ ;
+ }
+ if (ia16 >= 0x7ff0)
+ {
+ /* x == +/-NaN => log1p(x) = NaN. */
+ return __math_invalid (asdouble (ia));
+ }
+ /* x < -1 => log1p(x) = NaN.
+ x == -Inf => log1p(x) = NaN. */
+ return __math_invalid (x);
+ }
+
+ /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
+ is in [sqrt(2)/2, sqrt(2)]):
+ log1p(x) = k*log(2) + log1p(f).
+
+ f may not be representable exactly, so we need a correction term:
+ let m = round(1 + x), c = (1 + x) - m.
+ c << m: at very small x, log1p(x) ~ x, hence:
+ log(1+x) - log(m) ~ c/m.
+
+ We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */
+
+ uint64_t sign = ix & ~AbsMask;
+ if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))
+ {
+ if (unlikely (ia16 <= ExpM63))
+ {
+ /* If exponent of x <= -63 then shortcut the polynomial and avoid
+ underflow by just returning x, which is exactly rounded in this
+ region. */
+ return x;
+ }
+ /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the
+ logic below, as k = 0 and f = x and therefore representable exactly.
+ All we need is to return the polynomial. */
+ return fma (x, eval_poly (x) * x, x);
+ }
+
+ /* Obtain correctly scaled k by manipulation in the exponent. */
+ double m = x + 1;
+ uint64_t mi = asuint64 (m);
+ uint32_t u = (mi >> 32) + OneMHfRt2Top;
+ int32_t k = (int32_t) (u >> 20) - OneTop12;
+
+ /* Correction term c/m. */
+ double cm = (x - (m - 1)) / m;
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ uint32_t utop = (u & 0x000fffff) + HfRt2Top;
+ uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
+ double f = asdouble (u_red) - 1;
+
+ /* Approximate log1p(x) on the reduced input using a polynomial. Because
+ log1p(0)=0 we choose an approximation of the form:
+ x + C0*x^2 + C1*x^3 + C2x^4 + ...
+ Hence approximation has the form f + f^2 * P(f)
+ where P(x) = C0 + C1*x + C2x^2 + ... */
+ double p = fma (f, eval_poly (f) * f, f);
+
+ double kd = k;
+ double y = fma (Ln2Lo, kd, cm);
+ return y + fma (Ln2Hi, kd, p);
+}
+
+PL_SIG (S, D, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (log1p, 1.26)
+PL_TEST_INTERVAL (log1p, -10.0, 10.0, 10000)
+PL_TEST_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
+PL_TEST_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
+PL_TEST_INTERVAL (log1p, 0.001, 1.0, 50000)
+PL_TEST_INTERVAL (log1p, 0.0, -0x1p-23, 50000)
+PL_TEST_INTERVAL (log1p, -0x1p-23, -0.001, 50000)
+PL_TEST_INTERVAL (log1p, -0.001, -1.0, 50000)
+PL_TEST_INTERVAL (log1p, -1.0, inf, 5000)
diff --git a/pl/math/log1p_data.c b/pl/math/log1p_data.c
new file mode 100644
index 0000000..6168a0c
--- /dev/null
+++ b/pl/math/log1p_data.c
@@ -0,0 +1,19 @@
+/*
+ * Data used in double-precision log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Polynomial coefficients generated using Remez algorithm, see
+ log1p.sollya for details. */
+const struct log1p_data __log1p_data = {
+ .coeffs = {-0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
+ 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
+ -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
+ 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
+ -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
+ 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
+ -0x1.cfa7385bdb37ep-6}};
diff --git a/pl/math/log1pf_2u1.c b/pl/math/log1pf_2u1.c
new file mode 100644
index 0000000..fcfd05a
--- /dev/null
+++ b/pl/math/log1pf_2u1.c
@@ -0,0 +1,165 @@
+/*
+ * Single-precision log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "hornerf.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Ln2 (0x1.62e43p-1f)
+#define SignMask (0x80000000)
+
+/* Biased exponent of the largest float m for which m^8 underflows. */
+#define M8UFLOW_BOUND_BEXP 112
+/* Biased exponent of the largest float for which we just return x. */
+#define TINY_BOUND_BEXP 103
+
+#define C(i) __log1pf_data.coeffs[i]
+
+static inline float
+eval_poly (float m, uint32_t e)
+{
+#ifdef LOG1PF_2U5
+
+ /* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
+ slightly modified Estrin scheme (no x^0 term, and x term is just x). */
+ float p_12 = fmaf (m, C (1), C (0));
+ float p_34 = fmaf (m, C (3), C (2));
+ float p_56 = fmaf (m, C (5), C (4));
+ float p_78 = fmaf (m, C (7), C (6));
+
+ float m2 = m * m;
+ float p_02 = fmaf (m2, p_12, m);
+ float p_36 = fmaf (m2, p_56, p_34);
+ float p_79 = fmaf (m2, C (8), p_78);
+
+ float m4 = m2 * m2;
+ float p_06 = fmaf (m4, p_36, p_02);
+
+ if (unlikely (e < M8UFLOW_BOUND_BEXP))
+ return p_06;
+
+ float m8 = m4 * m4;
+ return fmaf (m8, p_79, p_06);
+
+#elif defined(LOG1PF_1U3)
+
+ /* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
+ scheme. Our polynomial approximation for log1p has the form
+ x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
+ Hence approximation has the form m + m^2 * P(m)
+ where P(x) = C1 + C2 * x + C3 * x^2 + ... . */
+ return fmaf (m, m * HORNER_8 (m, C), m);
+
+#else
+#error No log1pf approximation exists with the requested precision. Options are 13 or 25.
+#endif
+}
+
+static inline uint32_t
+biased_exponent (uint32_t ix)
+{
+ return (ix & 0x7f800000) >> 23;
+}
+
+/* log1pf approximation using polynomial on reduced interval. Worst-case error
+ when using Estrin is roughly 2.02 ULP:
+ log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
+float
+log1pf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & ~SignMask;
+ uint32_t ia12 = ia >> 20;
+ uint32_t e = biased_exponent (ix);
+
+ /* Handle special cases first. */
+ if (unlikely (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000
+ || e <= TINY_BOUND_BEXP))
+ {
+ if (ix == 0xff800000)
+ {
+ /* x == -Inf => log1pf(x) = NaN. */
+ return NAN;
+ }
+ if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8)
+ {
+ /* |x| < TinyBound => log1p(x) = x.
+ x == Inf => log1pf(x) = Inf. */
+ return x;
+ }
+ if (ix == 0xbf800000)
+ {
+ /* x == -1.0 => log1pf(x) = -Inf. */
+ return __math_divzerof (-1);
+ }
+ if (ia12 >= 0x7f8)
+ {
+ /* x == +/-NaN => log1pf(x) = NaN. */
+ return __math_invalidf (asfloat (ia));
+ }
+ /* x < -1.0 => log1pf(x) = NaN. */
+ return __math_invalidf (x);
+ }
+
+ /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+ is in [-0.25, 0.5]):
+ log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+ We approximate log1p(m) with a polynomial, then scale by
+ k*log(2). Instead of doing this directly, we use an intermediate
+ scale factor s = 4*k*log(2) to ensure the scale is representable
+ as a normalised fp32 number. */
+
+ if (ix <= 0x3f000000 || ia <= 0x3e800000)
+ {
+ /* If x is in [-0.25, 0.5] then we can shortcut all the logic
+ below, as k = 0 and m = x. All we need is to return the
+ polynomial. */
+ return eval_poly (x, e);
+ }
+
+ float m = x + 1.0f;
+
+ /* k is used scale the input. 0x3f400000 is chosen as we are trying to
+ reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
+ Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
+ let k = sign * 2^p where sign = -1 if x < 0
+ 1 otherwise
+ and p is a negative integer whose magnitude increases with the
+ magnitude of x. */
+ int k = (asuint (m) - 0x3f400000) & 0xff800000;
+
+ /* By using integer arithmetic, we obtain the necessary scaling by
+ subtracting the unbiased exponent of k from the exponent of x. */
+ float m_scale = asfloat (asuint (x) - k);
+
+ /* Scale up to ensure that the scale factor is representable as normalised
+ fp32 number (s in [2**-126,2**26]), and scale m down accordingly. */
+ float s = asfloat (asuint (4.0f) - k);
+ m_scale = m_scale + fmaf (0.25f, s, -1.0f);
+
+ float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
+
+ /* The scale factor to be applied back at the end - by multiplying float(k)
+ by 2^-23 we get the unbiased exponent of k. */
+ float scale_back = (float) k * 0x1.0p-23f;
+
+ /* Apply the scaling back. */
+ return fmaf (scale_back, Ln2, p);
+}
+
+PL_SIG (S, F, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (log1pf, 1.52)
+PL_TEST_INTERVAL (log1pf, -10.0, 10.0, 10000)
+PL_TEST_INTERVAL (log1pf, 0.0, 0x1p-23, 50000)
+PL_TEST_INTERVAL (log1pf, 0x1p-23, 0.001, 50000)
+PL_TEST_INTERVAL (log1pf, 0.001, 1.0, 50000)
+PL_TEST_INTERVAL (log1pf, 0.0, -0x1p-23, 50000)
+PL_TEST_INTERVAL (log1pf, -0x1p-23, -0.001, 50000)
+PL_TEST_INTERVAL (log1pf, -0.001, -1.0, 50000)
+PL_TEST_INTERVAL (log1pf, -1.0, inf, 5000)
diff --git a/pl/math/log1pf_data.c b/pl/math/log1pf_data.c
new file mode 100644
index 0000000..8c92d57
--- /dev/null
+++ b/pl/math/log1pf_data.c
@@ -0,0 +1,14 @@
+/*
+ * Data used in single-precision log1p(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+
+/* Polynomial coefficients generated using floating-point minimax
+ algorithm, see tools/log1pf.sollya for details. */
+const struct log1pf_data __log1pf_data
+ = {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
+ -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
+ -0x1.6f0d5ep-5f}};
diff --git a/pl/math/log_data.c b/pl/math/log_data.c
new file mode 100644
index 0000000..34715e5
--- /dev/null
+++ b/pl/math/log_data.c
@@ -0,0 +1,511 @@
+/*
+ * Data for log.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << LOG_TABLE_BITS)
+
+const struct log_data __log_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.poly1 = {
+#if LOG_POLY1_ORDER == 10
+// relative error: 0x1.32eccc6p-62
+// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
+-0x1p-1,
+0x1.55555555554e5p-2,
+-0x1.0000000000af2p-2,
+0x1.9999999bbe436p-3,
+-0x1.55555537f9cdep-3,
+0x1.24922fc8127cfp-3,
+-0x1.0000b7d6bb612p-3,
+0x1.c806ee1ddbcafp-4,
+-0x1.972335a9c2d6ep-4,
+#elif LOG_POLY1_ORDER == 11
+// relative error: 0x1.52c8b708p-68
+// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
+-0x1p-1,
+0x1.5555555555555p-2,
+-0x1.ffffffffffea9p-3,
+0x1.999999999c4d4p-3,
+-0x1.55555557f5541p-3,
+0x1.249248fbe33e4p-3,
+-0x1.ffffc9a3c825bp-4,
+0x1.c71e1f204435dp-4,
+-0x1.9a7f26377d06ep-4,
+0x1.71c30cf8f7364p-4,
+#elif LOG_POLY1_ORDER == 12
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
+#endif
+},
+.poly = {
+#if N == 64 && LOG_POLY_ORDER == 7
+// relative error: 0x1.906eb8ap-58
+// abs error: 0x1.d2cad5a8p-67
+// in -0x1.fp-8 0x1.fp-8
+-0x1.0000000000027p-1,
+0x1.555555555556ap-2,
+-0x1.fffffff0440bap-3,
+0x1.99999991906c3p-3,
+-0x1.555c8d7e8201ep-3,
+0x1.24978c59151fap-3,
+#elif N == 128 && LOG_POLY_ORDER == 6
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
+#elif N == 128 && LOG_POLY_ORDER == 7
+// relative error: 0x1.649fc4bp-64
+// abs error: 0x1.c3b5769p-74
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.5555555555556p-2,
+-0x1.fffffffea1a8p-3,
+0x1.99999998e9139p-3,
+-0x1.555776801b968p-3,
+0x1.2493c29331a5cp-3,
+#endif
+},
+/* Algorithm:
+
+ x = 2^k z
+ log(x) = k ln2 + log(c) + log(z/c)
+ log(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+ tab[i].invc = 1/c
+ tab[i].logc = (double)log(c)
+ tab2[i].chi = (double)c
+ tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+ 1) the rounding error in 0x1.8p9 + logc is 0,
+ 2) the rounding error in z - chi - clo is < 0x1p-66 and
+ 3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
+
+Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
+2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
+a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log(x)| < 0x1p-4, this is not enough so that is special cased. */
+.tab = {
+#if N == 64
+{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
+{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
+{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
+{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
+{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
+{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
+{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
+{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
+{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
+{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
+{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
+{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
+{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
+{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
+{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
+{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
+{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
+{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
+{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
+{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
+{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
+{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
+{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
+{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
+{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
+{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
+{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
+{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
+{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
+{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
+{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
+{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
+{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
+{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
+{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
+{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
+{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
+{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
+{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
+{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
+{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
+{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
+{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
+{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
+{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
+{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
+{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
+{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
+{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
+{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
+{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
+{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
+{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
+{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
+{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
+{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
+{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
+{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
+{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
+{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
+{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
+{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
+{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
+{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
+#elif N == 128
+{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
+{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
+{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
+{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
+{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
+{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
+{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
+{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
+{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
+{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
+{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
+{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
+{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
+{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
+{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
+{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
+{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
+{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
+{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
+{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
+{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
+{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
+{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
+{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
+{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
+{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
+{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
+{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
+{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
+{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
+{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
+{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
+{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
+{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
+{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
+{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
+{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
+{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
+{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
+{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
+{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
+{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
+{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
+{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
+{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
+{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
+{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
+{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
+{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
+{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
+{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
+{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
+{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
+{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
+{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
+{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
+{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
+{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
+{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
+{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
+{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
+{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
+{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
+{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
+{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
+{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
+{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
+{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
+{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
+{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
+{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
+{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
+{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
+{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
+{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
+{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
+{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
+{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
+{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
+{0x1.008040614b195p+0, -0x1.0040979240000p-9},
+{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
+{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
+{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
+{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
+{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
+{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
+{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
+{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
+{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
+{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
+{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
+{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
+{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
+{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
+{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
+{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
+{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
+{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
+{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
+{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
+{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
+{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
+{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
+{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
+{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
+{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
+{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
+{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
+{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
+{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
+{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
+{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
+{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
+{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
+{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
+{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
+{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
+{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
+{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
+{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
+{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
+{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
+{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
+{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
+{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
+{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
+{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
+{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
+#endif
+},
+#if !HAVE_FAST_FMA
+.tab2 = {
+#if N == 64
+{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
+{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
+{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
+{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
+{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
+{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
+{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
+{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
+{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
+{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
+{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
+{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
+{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
+{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
+{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
+{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
+{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
+{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
+{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
+{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
+{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
+{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
+{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
+{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
+{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
+{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
+{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
+{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
+{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
+{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
+{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
+{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
+{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
+{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
+{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
+{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
+{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
+{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
+{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
+{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
+{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
+{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
+{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
+{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
+{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
+{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
+{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
+{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
+{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
+{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
+{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
+{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
+{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
+{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
+{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
+{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
+{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
+{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
+{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
+{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
+{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
+{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
+{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
+{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
+#elif N == 128
+{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
+{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
+{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
+{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
+{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
+{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
+{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
+{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
+{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
+{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
+{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
+{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
+{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
+{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
+{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
+{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
+{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
+{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
+{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
+{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
+{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
+{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
+{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
+{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
+{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
+{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
+{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
+{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
+{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
+{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
+{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
+{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
+{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
+{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
+{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
+{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
+{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
+{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
+{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
+{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
+{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
+{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
+{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
+{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
+{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
+{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
+{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
+{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
+{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
+{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
+{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
+{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
+{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
+{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
+{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
+{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
+{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
+{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
+{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
+{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
+{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
+{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
+{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
+{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
+{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
+{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
+{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
+{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
+{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
+{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
+{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
+{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
+{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
+{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
+{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
+{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
+{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
+{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
+{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
+{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
+{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
+{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
+{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
+{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
+{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
+{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
+{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
+{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
+{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
+{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
+{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
+{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
+{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
+{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
+{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
+{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
+{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
+{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
+{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
+{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
+{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
+{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
+{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
+{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
+{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
+{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
+{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
+{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
+{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
+{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
+{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
+{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
+{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
+{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
+{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
+{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
+{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
+{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
+{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
+{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
+{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
+{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
+{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
+{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
+{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
+{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
+{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
+{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
+#endif
+},
+#endif /* !HAVE_FAST_FMA */
+};
diff --git a/pl/math/logf.c b/pl/math/logf.c
new file mode 100644
index 0000000..17a74ed
--- /dev/null
+++ b/pl/math/logf.c
@@ -0,0 +1,75 @@
+/*
+ * Single-precision log function.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
+
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
+
+float
+optr_aor_log_f32 (float x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t z, r, r2, y, y0, invc, logc;
+ uint32_t ix, iz, tmp;
+ int k, i;
+
+ ix = asuint (x);
+#if WANT_ROUNDING
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (unlikely (ix == 0x3f800000))
+ return 0;
+#endif
+ if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+ {
+ /* x < 0x1p-126 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzerof (1);
+ if (ix == 0x7f800000) /* log(inf) == inf. */
+ return x;
+ if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+ return __math_invalidf (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint (x * 0x1p23f);
+ ix -= 23 << 23;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+ k = (int32_t) tmp >> 23; /* arithmetic shift */
+ iz = ix - (tmp & 0x1ff << 23);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = (double_t) asfloat (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+ r = z * invc - 1;
+ y0 = logc + (double_t) k * Ln2;
+
+ /* Pipelined polynomial evaluation to approximate log1p(r). */
+ r2 = r * r;
+ y = A[1] * r + A[2];
+ y = A[0] * r2 + y;
+ y = y * r2 + (y0 + r);
+ return eval_as_float (y);
+}
diff --git a/pl/math/logf_data.c b/pl/math/logf_data.c
new file mode 100644
index 0000000..97d9eb8
--- /dev/null
+++ b/pl/math/logf_data.c
@@ -0,0 +1,36 @@
+/*
+ * Data definition for logf and log10f.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct logf_data __logf_data = {
+ .tab =
+ {
+ {0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2},
+ {0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2},
+ {0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2},
+ {0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3},
+ {0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3},
+ {0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3},
+ {0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4},
+ {0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4},
+ {0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5},
+ {0x1p+0, 0x0p+0},
+ {0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5},
+ {0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4},
+ {0x1.b2036576afce6p-1, 0x1.526e57720db08p-3},
+ {0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3},
+ {0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2},
+ {0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2},
+ },
+ .ln2 = 0x1.62e42fefa39efp-1,
+ .invln10 = 0x1.bcb7b1526e50ep-2,
+ .poly = {
+ -0x1.00ea348b88334p-2,
+ 0x1.5575b0be00b6ap-2,
+ -0x1.ffffef20a4123p-2,
+ }};
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
new file mode 100644
index 0000000..dccb3ce
--- /dev/null
+++ b/pl/math/math_config.h
@@ -0,0 +1,572 @@
+/*
+ * Configuration for math routines.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _MATH_CONFIG_H
+#define _MATH_CONFIG_H
+
+#include <math.h>
+#include <stdint.h>
+
+#ifndef WANT_ROUNDING
+/* If defined to 1, return correct results for special cases in non-nearest
+ rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
+ This may be set to 0 if there is no fenv support or if math functions only
+ get called in round to nearest mode. */
+# define WANT_ROUNDING 1
+#endif
+#ifndef WANT_ERRNO
+/* If defined to 1, set errno in math functions according to ISO C. Many math
+ libraries do not set errno, so this is 0 by default. It may need to be
+ set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
+# define WANT_ERRNO 0
+#endif
+#ifndef WANT_SIMD_EXCEPT
+/* If defined to 1, trigger fp exceptions in vector routines, consistently with
+ behaviour expected from the corresponding scalar routine. */
+#define WANT_SIMD_EXCEPT 0
+#endif
+
+/* Compiler can inline round as a single instruction. */
+#ifndef HAVE_FAST_ROUND
+# if __aarch64__
+# define HAVE_FAST_ROUND 1
+# else
+# define HAVE_FAST_ROUND 0
+# endif
+#endif
+
+/* Compiler can inline lround, but not (long)round(x). */
+#ifndef HAVE_FAST_LROUND
+# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
+# define HAVE_FAST_LROUND 1
+# else
+# define HAVE_FAST_LROUND 0
+# endif
+#endif
+
+/* Compiler can inline fma as a single instruction. */
+#ifndef HAVE_FAST_FMA
+# if defined FP_FAST_FMA || __aarch64__
+# define HAVE_FAST_FMA 1
+# else
+# define HAVE_FAST_FMA 0
+# endif
+#endif
+
+/* Provide *_finite symbols and some of the glibc hidden symbols
+ so libmathlib can be used with binaries compiled against glibc
+ to interpose math functions with both static and dynamic linking. */
+#ifndef USE_GLIBC_ABI
+# if __GNUC__
+# define USE_GLIBC_ABI 1
+# else
+# define USE_GLIBC_ABI 0
+# endif
+#endif
+
+/* Optionally used extensions. */
+#ifdef __GNUC__
+# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
+# define NOINLINE __attribute__ ((noinline))
+# define UNUSED __attribute__ ((unused))
+# define likely(x) __builtin_expect (!!(x), 1)
+# define unlikely(x) __builtin_expect (x, 0)
+# if __GNUC__ >= 9
+# define attribute_copy(f) __attribute__ ((copy (f)))
+# else
+# define attribute_copy(f)
+# endif
+# define strong_alias(f, a) \
+ extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
+# define hidden_alias(f, a) \
+ extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
+ attribute_copy (f);
+#else
+# define HIDDEN
+# define NOINLINE
+# define UNUSED
+# define likely(x) (x)
+# define unlikely(x) (x)
+#endif
+
+#if HAVE_FAST_ROUND
+/* When set, the roundtoint and converttoint functions are provided with
+ the semantics documented below. */
+# define TOINT_INTRINSICS 1
+
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+ consistently with converttoint so the results match. If the result
+ would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
+static inline double_t
+roundtoint (double_t x)
+{
+ return round (x);
+}
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+ consistently with roundtoint. If the result is not representible in an
+ int32_t then the semantics is unspecified. */
+static inline int32_t
+converttoint (double_t x)
+{
+# if HAVE_FAST_LROUND
+ return lround (x);
+# else
+ return (long) round (x);
+# endif
+}
+#endif
+
+static inline uint32_t
+asuint (float f)
+{
+ union
+ {
+ float f;
+ uint32_t i;
+ } u = {f};
+ return u.i;
+}
+
+static inline float
+asfloat (uint32_t i)
+{
+ union
+ {
+ uint32_t i;
+ float f;
+ } u = {i};
+ return u.f;
+}
+
+static inline uint64_t
+asuint64 (double f)
+{
+ union
+ {
+ double f;
+ uint64_t i;
+ } u = {f};
+ return u.i;
+}
+
+static inline double
+asdouble (uint64_t i)
+{
+ union
+ {
+ uint64_t i;
+ double f;
+ } u = {i};
+ return u.f;
+}
+
+#ifndef IEEE_754_2008_SNAN
+# define IEEE_754_2008_SNAN 1
+#endif
+static inline int
+issignalingf_inline (float x)
+{
+ uint32_t ix = asuint (x);
+ if (!IEEE_754_2008_SNAN)
+ return (ix & 0x7fc00000) == 0x7fc00000;
+ return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
+}
+
+static inline int
+issignaling_inline (double x)
+{
+ uint64_t ix = asuint64 (x);
+ if (!IEEE_754_2008_SNAN)
+ return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
+ return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
+}
+
+#if __aarch64__ && __GNUC__
+/* Prevent the optimization of a floating-point expression. */
+static inline float
+opt_barrier_float (float x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+ return x;
+}
+static inline double
+opt_barrier_double (double x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+ return x;
+}
+/* Force the evaluation of a floating-point expression for its side-effect. */
+static inline void
+force_eval_float (float x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+}
+static inline void
+force_eval_double (double x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+}
+#else
+static inline float
+opt_barrier_float (float x)
+{
+ volatile float y = x;
+ return y;
+}
+static inline double
+opt_barrier_double (double x)
+{
+ volatile double y = x;
+ return y;
+}
+static inline void
+force_eval_float (float x)
+{
+ volatile float y UNUSED = x;
+}
+static inline void
+force_eval_double (double x)
+{
+ volatile double y UNUSED = x;
+}
+#endif
+
+/* Evaluate an expression as the specified type, normally a type
+ cast should be enough, but compilers implement non-standard
+ excess-precision handling, so when FLT_EVAL_METHOD != 0 then
+ these functions may need to be customized. */
+static inline float
+eval_as_float (float x)
+{
+ return x;
+}
+static inline double
+eval_as_double (double x)
+{
+ return x;
+}
+
+/* Error handling tail calls for special cases, with a sign argument.
+ The sign of the return value is set if the argument is non-zero. */
+
+/* The result overflows. */
+HIDDEN float __math_oflowf (uint32_t);
+/* The result underflows to 0 in nearest rounding mode. */
+HIDDEN float __math_uflowf (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only. */
+HIDDEN float __math_may_uflowf (uint32_t);
+/* Division by zero. */
+HIDDEN float __math_divzerof (uint32_t);
+/* The result overflows. */
+HIDDEN double __math_oflow (uint32_t);
+/* The result underflows to 0 in nearest rounding mode. */
+HIDDEN double __math_uflow (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only. */
+HIDDEN double __math_may_uflow (uint32_t);
+/* Division by zero. */
+HIDDEN double __math_divzero (uint32_t);
+
+/* Error handling using input checking. */
+
+/* Invalid input unless it is a quiet NaN. */
+HIDDEN float __math_invalidf (float);
+/* Invalid input unless it is a quiet NaN. */
+HIDDEN double __math_invalid (double);
+
+/* Error handling using output checking, only for errno setting. */
+
+/* Check if the result overflowed to infinity. */
+HIDDEN double __math_check_oflow (double);
+/* Check if the result underflowed to 0. */
+HIDDEN double __math_check_uflow (double);
+
+/* Check if the result overflowed to infinity. */
+static inline double
+check_oflow (double x)
+{
+ return WANT_ERRNO ? __math_check_oflow (x) : x;
+}
+
+/* Check if the result underflowed to 0. */
+static inline double
+check_uflow (double x)
+{
+ return WANT_ERRNO ? __math_check_uflow (x) : x;
+}
+
+/* Check if the result overflowed to infinity. */
+HIDDEN float __math_check_oflowf (float);
+/* Check if the result underflowed to 0. */
+HIDDEN float __math_check_uflowf (float);
+
+/* Check if the result overflowed to infinity. */
+static inline float
+check_oflowf (float x)
+{
+ return WANT_ERRNO ? __math_check_oflowf (x) : x;
+}
+
+/* Check if the result underflowed to 0. */
+static inline float
+check_uflowf (float x)
+{
+ return WANT_ERRNO ? __math_check_uflowf (x) : x;
+}
+
+extern const struct erff_data
+{
+ float erff_poly_A[6];
+ float erff_poly_B[7];
+} __erff_data HIDDEN;
+
+/* Data for logf and log10f. */
+#define LOGF_TABLE_BITS 4
+#define LOGF_POLY_ORDER 4
+extern const struct logf_data
+{
+ struct
+ {
+ double invc, logc;
+ } tab[1 << LOGF_TABLE_BITS];
+ double ln2;
+ double invln10;
+ double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
+} __logf_data HIDDEN;
+
+/* Data for low accuracy log10 (with 1/ln(10) included in coefficients). */
+#define LOG10_TABLE_BITS 7
+#define LOG10_POLY_ORDER 6
+#define LOG10_POLY1_ORDER 12
+extern const struct log10_data
+{
+ double ln2hi;
+ double ln2lo;
+ double invln10;
+ double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10). */
+ double poly1[LOG10_POLY1_ORDER - 1];
+ struct {double invc, logc;} tab[1 << LOG10_TABLE_BITS];
+#if !HAVE_FAST_FMA
+ struct {double chi, clo;} tab2[1 << LOG10_TABLE_BITS];
+#endif
+} __log10_data HIDDEN;
+
+#define EXP_TABLE_BITS 7
+#define EXP_POLY_ORDER 5
+/* Use polynomial that is optimized for a wider input range. This may be
+ needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
+#define EXP_POLY_WIDE 0
+/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
+ needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
+#define EXP_USE_TOINT_NARROW 0
+#define EXP2_POLY_ORDER 5
+#define EXP2_POLY_WIDE 0
+extern const struct exp_data
+{
+ double invln2N;
+ double shift;
+ double negln2hiN;
+ double negln2loN;
+ double poly[4]; /* Last four coefficients. */
+ double exp2_shift;
+ double exp2_poly[EXP2_POLY_ORDER];
+ uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+} __exp_data HIDDEN;
+
+#define ERFC_NUM_INTERVALS 20
+#define ERFC_POLY_ORDER 12
+extern const struct erfc_data
+{
+ double interval_bounds[ERFC_NUM_INTERVALS + 1];
+ double poly[ERFC_NUM_INTERVALS][ERFC_POLY_ORDER + 1];
+} __erfc_data HIDDEN;
+extern const struct v_erfc_data
+{
+ double interval_bounds[ERFC_NUM_INTERVALS + 1];
+ double poly[ERFC_NUM_INTERVALS + 1][ERFC_POLY_ORDER + 1];
+} __v_erfc_data HIDDEN;
+
+#define ERFCF_POLY_NCOEFFS 16
+extern const struct erfcf_poly_data
+{
+ double poly[4][ERFCF_POLY_NCOEFFS];
+} __erfcf_poly_data HIDDEN;
+
+#define V_EXP_TAIL_TABLE_BITS 8
+extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
+
+#define V_ERF_NINTS 49
+#define V_ERF_NCOEFFS 10
+extern const struct v_erf_data
+{
+ double shifts[V_ERF_NINTS];
+ double coeffs[V_ERF_NCOEFFS][V_ERF_NINTS];
+} __v_erf_data HIDDEN;
+
+#define V_ERFF_NCOEFFS 7
+extern const struct v_erff_data
+{
+ float coeffs[V_ERFF_NCOEFFS][2];
+} __v_erff_data HIDDEN;
+
+#define ATAN_POLY_NCOEFFS 20
+extern const struct atan_poly_data
+{
+ double poly[ATAN_POLY_NCOEFFS];
+} __atan_poly_data HIDDEN;
+
+#define ATANF_POLY_NCOEFFS 8
+extern const struct atanf_poly_data
+{
+ float poly[ATANF_POLY_NCOEFFS];
+} __atanf_poly_data HIDDEN;
+
+#define ASINHF_NCOEFFS 8
+extern const struct asinhf_data
+{
+ float coeffs[ASINHF_NCOEFFS];
+} __asinhf_data HIDDEN;
+
+#define LOG_TABLE_BITS 7
+#define LOG_POLY_ORDER 6
+#define LOG_POLY1_ORDER 12
+extern const struct log_data
+{
+ double ln2hi;
+ double ln2lo;
+ double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
+ double poly1[LOG_POLY1_ORDER - 1];
+ struct
+ {
+ double invc, logc;
+ } tab[1 << LOG_TABLE_BITS];
+#if !HAVE_FAST_FMA
+ struct
+ {
+ double chi, clo;
+ } tab2[1 << LOG_TABLE_BITS];
+#endif
+} __log_data HIDDEN;
+
+#define ASINH_NCOEFFS 18
+extern const struct asinh_data
+{
+ double poly[ASINH_NCOEFFS];
+} __asinh_data HIDDEN;
+
+#define LOG1P_NCOEFFS 19
+extern const struct log1p_data
+{
+ double coeffs[LOG1P_NCOEFFS];
+} __log1p_data HIDDEN;
+
+#define LOG1PF_2U5
+#define V_LOG1PF_2U5
+#define LOG1PF_NCOEFFS 9
+extern const struct log1pf_data
+{
+ float coeffs[LOG1PF_NCOEFFS];
+} __log1pf_data HIDDEN;
+
+#define TANF_P_POLY_NCOEFFS 6
+/* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps. */
+#define TANF_Q_POLY_NCOEFFS 4
+extern const struct tanf_poly_data
+{
+ float poly_tan[TANF_P_POLY_NCOEFFS];
+ float poly_cotan[TANF_Q_POLY_NCOEFFS];
+} __tanf_poly_data HIDDEN;
+
+#define V_LOG2F_POLY_NCOEFFS 9
+extern const struct v_log2f_data
+{
+ float poly[V_LOG2F_POLY_NCOEFFS];
+} __v_log2f_data HIDDEN;
+
+#define V_LOG2_TABLE_BITS 7
+#define V_LOG2_POLY_ORDER 6
+extern const struct v_log2_data
+{
+ double poly[V_LOG2_POLY_ORDER - 1];
+ struct
+ {
+ double invc, log2c;
+ } tab[1 << V_LOG2_TABLE_BITS];
+} __v_log2_data HIDDEN;
+
+#define V_SINF_NCOEFFS 4
+extern const struct sv_sinf_data
+{
+ float coeffs[V_SINF_NCOEFFS];
+} __sv_sinf_data HIDDEN;
+
+#define V_LOG10_TABLE_BITS 7
+#define V_LOG10_POLY_ORDER 6
+extern const struct v_log10_data
+{
+ struct
+ {
+ double invc, log10c;
+ } tab[1 << V_LOG10_TABLE_BITS];
+ double poly[V_LOG10_POLY_ORDER - 1];
+ double invln10, log10_2;
+} __v_log10_data HIDDEN;
+
+#define V_LOG10F_POLY_ORDER 9
+extern const float __v_log10f_poly[V_LOG10F_POLY_ORDER - 1] HIDDEN;
+
+#define SV_LOGF_POLY_ORDER 8
+extern const float __sv_logf_poly[SV_LOGF_POLY_ORDER - 1] HIDDEN;
+
+#define SV_LOG_POLY_ORDER 6
+#define SV_LOG_TABLE_BITS 7
+extern const struct sv_log_data
+{
+ double invc[1 << SV_LOG_TABLE_BITS];
+ double logc[1 << SV_LOG_TABLE_BITS];
+ double poly[SV_LOG_POLY_ORDER - 1];
+} __sv_log_data HIDDEN;
+
+#ifndef SV_EXPF_USE_FEXPA
+#define SV_EXPF_USE_FEXPA 0
+#endif
+#define SV_EXPF_POLY_ORDER 6
+extern const float __sv_expf_poly[SV_EXPF_POLY_ORDER - 1] HIDDEN;
+
+#define EXPM1F_POLY_ORDER 5
+extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
+
+#define EXPF_TABLE_BITS 5
+#define EXPF_POLY_ORDER 3
+extern const struct expf_data
+{
+ uint64_t tab[1 << EXPF_TABLE_BITS];
+ double invln2_scaled;
+ double poly_scaled[EXPF_POLY_ORDER];
+} __expf_data HIDDEN;
+
+#define EXPM1_POLY_ORDER 11
+extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
+
+extern const struct cbrtf_data
+{
+ float poly[4];
+ float table[5];
+} __cbrtf_data HIDDEN;
+
+extern const struct cbrt_data
+{
+ double poly[4];
+ double table[5];
+} __cbrt_data HIDDEN;
+
+extern const struct v_tan_data
+{
+ double neg_half_pi_hi, neg_half_pi_lo;
+ double poly[9];
+} __v_tan_data HIDDEN;
+#endif
diff --git a/pl/math/math_err.c b/pl/math/math_err.c
new file mode 100644
index 0000000..d246a89
--- /dev/null
+++ b/pl/math/math_err.c
@@ -0,0 +1,78 @@
+/*
+ * Double-precision math error handling.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#if WANT_ERRNO
+#include <errno.h>
+/* NOINLINE reduces code size and avoids making math functions non-leaf
+ when the error handling is inlined. */
+NOINLINE static double
+with_errno (double y, int e)
+{
+ errno = e;
+ return y;
+}
+#else
+#define with_errno(x, e) (x)
+#endif
+
+/* NOINLINE reduces code size. */
+NOINLINE static double
+xflow (uint32_t sign, double y)
+{
+ y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
+ return with_errno (y, ERANGE);
+}
+
+HIDDEN double
+__math_uflow (uint32_t sign)
+{
+ return xflow (sign, 0x1p-767);
+}
+
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+ is valid even if the result is non-zero, but in the subnormal range. */
+HIDDEN double
+__math_may_uflow (uint32_t sign)
+{
+ return xflow (sign, 0x1.8p-538);
+}
+
+HIDDEN double
+__math_oflow (uint32_t sign)
+{
+ return xflow (sign, 0x1p769);
+}
+
+HIDDEN double
+__math_divzero (uint32_t sign)
+{
+ double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
+ return with_errno (y, ERANGE);
+}
+
+HIDDEN double
+__math_invalid (double x)
+{
+ double y = (x - x) / (x - x);
+ return isnan (x) ? y : with_errno (y, EDOM);
+}
+
+/* Check result and set errno if necessary. */
+
+HIDDEN double
+__math_check_uflow (double y)
+{
+ return y == 0.0 ? with_errno (y, ERANGE) : y;
+}
+
+HIDDEN double
+__math_check_oflow (double y)
+{
+ return isinf (y) ? with_errno (y, ERANGE) : y;
+}
diff --git a/pl/math/math_errf.c b/pl/math/math_errf.c
new file mode 100644
index 0000000..96271ff
--- /dev/null
+++ b/pl/math/math_errf.c
@@ -0,0 +1,78 @@
+/*
+ * Single-precision math error handling.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#if WANT_ERRNO
+#include <errno.h>
+/* NOINLINE reduces code size and avoids making math functions non-leaf
+ when the error handling is inlined. */
+NOINLINE static float
+with_errnof (float y, int e)
+{
+ errno = e;
+ return y;
+}
+#else
+#define with_errnof(x, e) (x)
+#endif
+
+/* NOINLINE reduces code size. */
+NOINLINE static float
+xflowf (uint32_t sign, float y)
+{
+ y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
+ return with_errnof (y, ERANGE);
+}
+
+HIDDEN float
+__math_uflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1p-95f);
+}
+
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+ is valid even if the result is non-zero, but in the subnormal range. */
+HIDDEN float
+__math_may_uflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1.4p-75f);
+}
+
+HIDDEN float
+__math_oflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1p97f);
+}
+
+HIDDEN float
+__math_divzerof (uint32_t sign)
+{
+ float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
+ return with_errnof (y, ERANGE);
+}
+
+HIDDEN float
+__math_invalidf (float x)
+{
+ float y = (x - x) / (x - x);
+ return isnan (x) ? y : with_errnof (y, EDOM);
+}
+
+/* Check result and set errno if necessary. */
+
+HIDDEN float
+__math_check_uflowf (float y)
+{
+ return y == 0.0f ? with_errnof (y, ERANGE) : y;
+}
+
+HIDDEN float
+__math_check_oflowf (float y)
+{
+ return isinf (y) ? with_errnof (y, ERANGE) : y;
+}
diff --git a/pl/math/pairwise_horner.h b/pl/math/pairwise_horner.h
new file mode 100644
index 0000000..6ad98dc
--- /dev/null
+++ b/pl/math/pairwise_horner.h
@@ -0,0 +1,14 @@
+/*
+ * Helper macros for double-precision pairwise Horner polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#if V_SUPPORTED
+#define FMA v_fma_f64
+#else
+#define FMA fma
+#endif
+
+#include "pairwise_horner_wrap.h"
diff --git a/pl/math/pairwise_horner_wrap.h b/pl/math/pairwise_horner_wrap.h
new file mode 100644
index 0000000..e56f059
--- /dev/null
+++ b/pl/math/pairwise_horner_wrap.h
@@ -0,0 +1,48 @@
+/*
+ * Helper macros for pairwise Horner polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+// clang-format off
+#define PW_HORNER_1_(x, c, i) FMA(x, c(i + 1), c(i))
+#define PW_HORNER_3_(x, x2, c, i) FMA(x2, PW_HORNER_1_ (x, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_5_(x, x2, c, i) FMA(x2, PW_HORNER_3_ (x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_7_(x, x2, c, i) FMA(x2, PW_HORNER_5_ (x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_9_(x, x2, c, i) FMA(x2, PW_HORNER_7_ (x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_11_(x, x2, c, i) FMA(x2, PW_HORNER_9_ (x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_13_(x, x2, c, i) FMA(x2, PW_HORNER_11_(x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_15_(x, x2, c, i) FMA(x2, PW_HORNER_13_(x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_17_(x, x2, c, i) FMA(x2, PW_HORNER_15_(x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+
+#define PAIRWISE_HORNER_1(x, c) PW_HORNER_1_ (x, c, 0)
+#define PAIRWISE_HORNER_3(x, x2, c) PW_HORNER_3_ (x, x2, c, 0)
+#define PAIRWISE_HORNER_5(x, x2, c) PW_HORNER_5_ (x, x2, c, 0)
+#define PAIRWISE_HORNER_7(x, x2, c) PW_HORNER_7_ (x, x2, c, 0)
+#define PAIRWISE_HORNER_9(x, x2, c) PW_HORNER_9_ (x, x2, c, 0)
+#define PAIRWISE_HORNER_11(x, x2, c) PW_HORNER_11_(x, x2, c, 0)
+#define PAIRWISE_HORNER_13(x, x2, c) PW_HORNER_13_(x, x2, c, 0)
+#define PAIRWISE_HORNER_15(x, x2, c) PW_HORNER_15_(x, x2, c, 0)
+#define PAIRWISE_HORNER_17(x, x2, c) PW_HORNER_17_(x, x2, c, 0)
+
+#define PW_HORNER_2_(x, x2, c, i) FMA(x2, c(i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_4_(x, x2, c, i) FMA(x2, PW_HORNER_2_ (x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_6_(x, x2, c, i) FMA(x2, PW_HORNER_4_ (x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_8_(x, x2, c, i) FMA(x2, PW_HORNER_6_ (x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_10_(x, x2, c, i) FMA(x2, PW_HORNER_8_ (x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_12_(x, x2, c, i) FMA(x2, PW_HORNER_10_(x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_14_(x, x2, c, i) FMA(x2, PW_HORNER_12_(x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_16_(x, x2, c, i) FMA(x2, PW_HORNER_14_(x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+#define PW_HORNER_18_(x, x2, c, i) FMA(x2, PW_HORNER_16_(x, x2, c, i + 2), PW_HORNER_1_(x, c, i))
+
+#define PAIRWISE_HORNER_2(x, x2, c) PW_HORNER_2_ (x, x2, c, 0)
+#define PAIRWISE_HORNER_4(x, x2, c) PW_HORNER_4_ (x, x2, c, 0)
+#define PAIRWISE_HORNER_6(x, x2, c) PW_HORNER_6_ (x, x2, c, 0)
+#define PAIRWISE_HORNER_8(x, x2, c) PW_HORNER_8_(x, x2, c, 0)
+#define PAIRWISE_HORNER_10(x, x2, c) PW_HORNER_10_(x, x2, c, 0)
+#define PAIRWISE_HORNER_12(x, x2, c) PW_HORNER_12_(x, x2, c, 0)
+#define PAIRWISE_HORNER_14(x, x2, c) PW_HORNER_14_(x, x2, c, 0)
+#define PAIRWISE_HORNER_16(x, x2, c) PW_HORNER_16_(x, x2, c, 0)
+#define PAIRWISE_HORNER_18(x, x2, c) PW_HORNER_18_(x, x2, c, 0)
+// clang-format on
diff --git a/pl/math/pairwise_hornerf.h b/pl/math/pairwise_hornerf.h
new file mode 100644
index 0000000..784750c
--- /dev/null
+++ b/pl/math/pairwise_hornerf.h
@@ -0,0 +1,14 @@
+/*
+ * Helper macros for single-precision pairwise Horner polynomial evaluation.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#if V_SUPPORTED
+#define FMA v_fma_f32
+#else
+#define FMA fmaf
+#endif
+
+#include "pairwise_horner_wrap.h"
diff --git a/pl/math/pl_sig.h b/pl/math/pl_sig.h
new file mode 100644
index 0000000..686d24f
--- /dev/null
+++ b/pl/math/pl_sig.h
@@ -0,0 +1,43 @@
+/*
+ * PL macros for emitting various ulp/bench entries based on function signature
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+#define PL_DECL_SF1(fun) float fun##f (float);
+#define PL_DECL_SF2(fun) float fun##f (float, float);
+#define PL_DECL_SD1(fun) double fun (double);
+#define PL_DECL_SD2(fun) double fun (double, double);
+
+#if V_SUPPORTED
+#define PL_DECL_VF1(fun) VPCS_ATTR v_f32_t V_NAME (fun##f) (v_f32_t);
+#define PL_DECL_VF2(fun) VPCS_ATTR v_f32_t V_NAME (fun##f) (v_f32_t, v_f32_t);
+#define PL_DECL_VD1(fun) VPCS_ATTR v_f64_t V_NAME (fun) (v_f64_t);
+#define PL_DECL_VD2(fun) VPCS_ATTR v_f64_t V_NAME (fun) (v_f64_t, v_f64_t);
+#else
+#define PL_DECL_VF1(fun)
+#define PL_DECL_VF2(fun)
+#define PL_DECL_VD1(fun)
+#define PL_DECL_VD2(fun)
+#endif
+
+#if SV_SUPPORTED
+#define PL_DECL_SVF1(fun) sv_f32_t __sv_##fun##f_x (sv_f32_t, svbool_t);
+#define PL_DECL_SVF2(fun) \
+ sv_f32_t __sv_##fun##f_x (sv_f32_t, sv_f32_t, svbool_t);
+#define PL_DECL_SVD1(fun) sv_f64_t __sv_##fun##_x (sv_f64_t, svbool_t);
+#define PL_DECL_SVD2(fun) \
+ sv_f64_t __sv_##fun##_x (sv_f64_t, sv_f64_t, svbool_t);
+#else
+#define PL_DECL_SVF1(fun)
+#define PL_DECL_SVF2(fun)
+#define PL_DECL_SVD1(fun)
+#define PL_DECL_SVD2(fun)
+#endif
+
+/* For building the routines, emit function prototype from PL_SIG. This
+ ensures that the correct signature has been chosen (wrong one will be a
+ compile error). PL_SIG is defined differently by various components of the
+ build system to emit entries in the wrappers and entries for mathbench and
+ ulp. */
+#define PL_SIG(v, t, a, f, ...) PL_DECL_##v##t##a (f)
diff --git a/pl/math/s_acosh_3u5.c b/pl/math/s_acosh_3u5.c
new file mode 100644
index 0000000..f62cbd6
--- /dev/null
+++ b/pl/math/s_acosh_3u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_acosh_3u5.c"
diff --git a/pl/math/s_acoshf_3u1.c b/pl/math/s_acoshf_3u1.c
new file mode 100644
index 0000000..3740666
--- /dev/null
+++ b/pl/math/s_acoshf_3u1.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_acoshf_3u1.c"
diff --git a/pl/math/s_asinh_3u5.c b/pl/math/s_asinh_3u5.c
new file mode 100644
index 0000000..ab8fbd9
--- /dev/null
+++ b/pl/math/s_asinh_3u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_asinh_3u5.c"
diff --git a/pl/math/s_asinhf_2u7.c b/pl/math/s_asinhf_2u7.c
new file mode 100644
index 0000000..13e1a5f
--- /dev/null
+++ b/pl/math/s_asinhf_2u7.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_asinhf_2u7.c"
diff --git a/pl/math/s_atan2_3u.c b/pl/math/s_atan2_3u.c
new file mode 100644
index 0000000..4603e5f
--- /dev/null
+++ b/pl/math/s_atan2_3u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_atan2_3u.c"
diff --git a/pl/math/s_atan2f_3u.c b/pl/math/s_atan2f_3u.c
new file mode 100644
index 0000000..894d843
--- /dev/null
+++ b/pl/math/s_atan2f_3u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_atan2f_3u.c"
diff --git a/pl/math/s_atan_2u5.c b/pl/math/s_atan_2u5.c
new file mode 100644
index 0000000..4b61bc4
--- /dev/null
+++ b/pl/math/s_atan_2u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_atan_2u5.c"
diff --git a/pl/math/s_atanf_3u.c b/pl/math/s_atanf_3u.c
new file mode 100644
index 0000000..6b65719
--- /dev/null
+++ b/pl/math/s_atanf_3u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_atanf_3u.c"
diff --git a/pl/math/s_atanh_3u5.c b/pl/math/s_atanh_3u5.c
new file mode 100644
index 0000000..f6a5f75
--- /dev/null
+++ b/pl/math/s_atanh_3u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_atanh_3u5.c"
diff --git a/pl/math/s_atanhf_3u1.c b/pl/math/s_atanhf_3u1.c
new file mode 100644
index 0000000..e7e5c61
--- /dev/null
+++ b/pl/math/s_atanhf_3u1.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_atanhf_3u1.c"
diff --git a/pl/math/s_cbrt_2u.c b/pl/math/s_cbrt_2u.c
new file mode 100644
index 0000000..435e74a
--- /dev/null
+++ b/pl/math/s_cbrt_2u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_cbrt_2u.c"
diff --git a/pl/math/s_cbrtf_1u5.c b/pl/math/s_cbrtf_1u5.c
new file mode 100644
index 0000000..5c79370
--- /dev/null
+++ b/pl/math/s_cbrtf_1u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_cbrtf_1u5.c"
diff --git a/pl/math/s_cosh_2u.c b/pl/math/s_cosh_2u.c
new file mode 100644
index 0000000..cdf352c
--- /dev/null
+++ b/pl/math/s_cosh_2u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_cosh_2u.c"
diff --git a/pl/math/s_coshf_2u4.c b/pl/math/s_coshf_2u4.c
new file mode 100644
index 0000000..8f7d5da
--- /dev/null
+++ b/pl/math/s_coshf_2u4.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_coshf_2u4.c"
diff --git a/pl/math/s_erf_2u.c b/pl/math/s_erf_2u.c
new file mode 100644
index 0000000..839535c
--- /dev/null
+++ b/pl/math/s_erf_2u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_erf_2u.c"
diff --git a/pl/math/s_erfc_4u.c b/pl/math/s_erfc_4u.c
new file mode 100644
index 0000000..bf9e3e6
--- /dev/null
+++ b/pl/math/s_erfc_4u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_erfc_4u.c"
diff --git a/pl/math/s_erfcf_1u.c b/pl/math/s_erfcf_1u.c
new file mode 100644
index 0000000..024d224
--- /dev/null
+++ b/pl/math/s_erfcf_1u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_erfcf_1u.c"
diff --git a/pl/math/s_erff_1u5.c b/pl/math/s_erff_1u5.c
new file mode 100644
index 0000000..a5b9bf9
--- /dev/null
+++ b/pl/math/s_erff_1u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_erff_1u5.c"
diff --git a/pl/math/s_exp_tail.c b/pl/math/s_exp_tail.c
new file mode 100644
index 0000000..20b1b41
--- /dev/null
+++ b/pl/math/s_exp_tail.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_exp_tail.c"
diff --git a/pl/math/s_expf.c b/pl/math/s_expf.c
new file mode 100644
index 0000000..557a2e3
--- /dev/null
+++ b/pl/math/s_expf.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_expf.c"
diff --git a/pl/math/s_expm1_2u5.c b/pl/math/s_expm1_2u5.c
new file mode 100644
index 0000000..da2d6e7
--- /dev/null
+++ b/pl/math/s_expm1_2u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_expm1_2u5.c"
diff --git a/pl/math/s_expm1f_1u6.c b/pl/math/s_expm1f_1u6.c
new file mode 100644
index 0000000..eea8089
--- /dev/null
+++ b/pl/math/s_expm1f_1u6.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_expm1f_1u6.c"
diff --git a/pl/math/s_log10_2u5.c b/pl/math/s_log10_2u5.c
new file mode 100644
index 0000000..2480e5a
--- /dev/null
+++ b/pl/math/s_log10_2u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_log10_2u5.c"
diff --git a/pl/math/s_log10f_3u5.c b/pl/math/s_log10f_3u5.c
new file mode 100644
index 0000000..173e0fd
--- /dev/null
+++ b/pl/math/s_log10f_3u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_log10f_3u5.c"
diff --git a/pl/math/s_log1p_2u5.c b/pl/math/s_log1p_2u5.c
new file mode 100644
index 0000000..20b395a
--- /dev/null
+++ b/pl/math/s_log1p_2u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_log1p_2u5.c"
diff --git a/pl/math/s_log1pf_2u1.c b/pl/math/s_log1pf_2u1.c
new file mode 100644
index 0000000..013ec4c
--- /dev/null
+++ b/pl/math/s_log1pf_2u1.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_log1pf_2u1.c"
diff --git a/pl/math/s_log2_3u.c b/pl/math/s_log2_3u.c
new file mode 100644
index 0000000..d46f3f9
--- /dev/null
+++ b/pl/math/s_log2_3u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_log2_3u.c"
diff --git a/pl/math/s_log2f_2u5.c b/pl/math/s_log2f_2u5.c
new file mode 100644
index 0000000..e76c67d
--- /dev/null
+++ b/pl/math/s_log2f_2u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_log2f_2u5.c"
diff --git a/pl/math/s_sinh_3u.c b/pl/math/s_sinh_3u.c
new file mode 100644
index 0000000..27e5e65
--- /dev/null
+++ b/pl/math/s_sinh_3u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_sinh_3u.c"
diff --git a/pl/math/s_sinhf_2u3.c b/pl/math/s_sinhf_2u3.c
new file mode 100644
index 0000000..607f942
--- /dev/null
+++ b/pl/math/s_sinhf_2u3.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_sinhf_2u3.c"
diff --git a/pl/math/s_tan_3u5.c b/pl/math/s_tan_3u5.c
new file mode 100644
index 0000000..adb807c
--- /dev/null
+++ b/pl/math/s_tan_3u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_tan_3u5.c"
diff --git a/pl/math/s_tanf_3u5.c b/pl/math/s_tanf_3u5.c
new file mode 100644
index 0000000..fa64c8a
--- /dev/null
+++ b/pl/math/s_tanf_3u5.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_tanf_3u5.c"
diff --git a/pl/math/s_tanh_3u.c b/pl/math/s_tanh_3u.c
new file mode 100644
index 0000000..a4d7bce
--- /dev/null
+++ b/pl/math/s_tanh_3u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_tanh_3u.c"
diff --git a/pl/math/s_tanhf_2u6.c b/pl/math/s_tanhf_2u6.c
new file mode 100644
index 0000000..896fc62
--- /dev/null
+++ b/pl/math/s_tanhf_2u6.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_tanhf_2u6.c"
diff --git a/pl/math/sinh_3u.c b/pl/math/sinh_3u.c
new file mode 100644
index 0000000..f534815
--- /dev/null
+++ b/pl/math/sinh_3u.c
@@ -0,0 +1,66 @@
+/*
+ * Double-precision sinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define OFlowBound \
+ 0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results \
+ in NaN. */
+
+double
+__exp_dd (double, double);
+
+/* Approximation for double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ __v_sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+ want 0x1.ab34e59d678d9p-2. */
+double
+sinh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iax = ix & AbsMask;
+ double ax = asdouble (iax);
+ uint64_t sign = ix & ~AbsMask;
+ double halfsign = asdouble (Half | sign);
+
+ if (unlikely (iax >= OFlowBound))
+ {
+ /* Special values and overflow. */
+ if (unlikely (iax > 0x7ff0000000000000))
+ return __math_invalidf (x);
+ /* expm1 overflows a little before sinh. We have to fill this
+ gap by using a different algorithm, in this case we use a
+ double-precision exp helper. For large x sinh(x) is dominated
+ by exp(x), however we cannot compute exp without overflow
+ either. We use the identity: exp(a) = (exp(a / 2)) ^ 2
+ to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2 for x > 0
+ ~= (exp(|x| / 2)) ^ 2 / -2 for x < 0. */
+ double e = __exp_dd (ax / 2, 0);
+ return (e * halfsign) * e;
+ }
+
+ /* Use expm1f to retain acceptable precision for small numbers.
+ Let t = e^(|x|) - 1. */
+ double t = expm1 (ax);
+ /* Then sinh(x) = (t + t / (t + 1)) / 2 for x > 0
+ (t + t / (t + 1)) / -2 for x < 0. */
+ return (t + t / (t + 1)) * halfsign;
+}
+
+PL_SIG (S, D, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (sinh, 2.08)
+PL_TEST_INTERVAL (sinh, 0, 0x1p-51, 100)
+PL_TEST_INTERVAL (sinh, -0, -0x1p-51, 100)
+PL_TEST_INTERVAL (sinh, 0x1p-51, 0x1.62e42fefa39fp+9, 100000)
+PL_TEST_INTERVAL (sinh, -0x1p-51, -0x1.62e42fefa39fp+9, 100000)
+PL_TEST_INTERVAL (sinh, 0x1.62e42fefa39fp+9, inf, 1000)
+PL_TEST_INTERVAL (sinh, -0x1.62e42fefa39fp+9, -inf, 1000)
diff --git a/pl/math/sinhf_2u3.c b/pl/math/sinhf_2u3.c
new file mode 100644
index 0000000..de94428
--- /dev/null
+++ b/pl/math/sinhf_2u3.c
@@ -0,0 +1,76 @@
+/*
+ * Single-precision sinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define Expm1OFlowLimit \
+ 0x42b17218 /* 0x1.62e43p+6, 2^7*ln2, minimum value for which expm1f \
+ overflows. */
+#define OFlowLimit \
+ 0x42b2d4fd /* 0x1.65a9fap+6, minimum positive value for which sinhf should \
+ overflow. */
+
+float
+optr_aor_exp_f32 (float);
+
+/* Approximation for single-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The maximum error is 2.26 ULP:
+ sinhf(0x1.e34a9ep-4) got 0x1.e469ep-4 want 0x1.e469e4p-4. */
+float
+sinhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ float ax = asfloat (iax);
+ uint32_t sign = ix & ~AbsMask;
+ float halfsign = asfloat (Half | sign);
+
+ if (unlikely (iax >= Expm1OFlowLimit))
+ {
+ /* Special values and overflow. */
+ if (iax >= 0x7fc00001 || iax == 0x7f800000)
+ return x;
+ if (iax >= 0x7f800000)
+ return __math_invalidf (x);
+ if (iax >= OFlowLimit)
+ return __math_oflowf (sign);
+
+ /* expm1f overflows a little before sinhf, (~88.7 vs ~89.4). We have to
+ fill this gap by using a different algorithm, in this case we use a
+ double-precision exp helper. For large x sinh(x) dominated by exp(x),
+ however we cannot compute exp without overflow either. We use the
+ identity:
+ exp(a) = (exp(a / 2)) ^ 2.
+ to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2 for x > 0
+ ~= (exp(|x| / 2)) ^ 2 / -2 for x < 0.
+ Greatest error in this region is 1.89 ULP:
+ sinhf(0x1.65898cp+6) got 0x1.f00aep+127 want 0x1.f00adcp+127. */
+ float e = optr_aor_exp_f32 (ax / 2);
+ return (e * halfsign) * e;
+ }
+
+ /* Use expm1f to retain acceptable precision for small numbers.
+ Let t = e^(|x|) - 1. */
+ float t = expm1f (ax);
+ /* Then sinh(x) = (t + t / (t + 1)) / 2 for x > 0
+ (t + t / (t + 1)) / -2 for x < 0. */
+ return (t + t / (t + 1)) * halfsign;
+}
+
+PL_SIG (S, F, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (sinhf, 1.76)
+PL_TEST_INTERVAL (sinhf, 0, 0x1.62e43p+6, 100000)
+PL_TEST_INTERVAL (sinhf, -0, -0x1.62e43p+6, 100000)
+PL_TEST_INTERVAL (sinhf, 0x1.62e43p+6, 0x1.65a9fap+6, 100)
+PL_TEST_INTERVAL (sinhf, -0x1.62e43p+6, -0x1.65a9fap+6, 100)
+PL_TEST_INTERVAL (sinhf, 0x1.65a9fap+6, inf, 100)
+PL_TEST_INTERVAL (sinhf, -0x1.65a9fap+6, -inf, 100)
diff --git a/pl/math/sv_atan2_2u5.c b/pl/math/sv_atan2_2u5.c
new file mode 100644
index 0000000..a4bea1d
--- /dev/null
+++ b/pl/math/sv_atan2_2u5.c
@@ -0,0 +1,93 @@
+/*
+ * Double-precision vector atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#include "sv_atan_common.h"
+
+/* Useful constants. */
+#define PiOver2 sv_f64 (0x1.921fb54442d18p+0)
+#define SignMask sv_u64 (0x8000000000000000)
+
+/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
+__attribute__ ((noinline)) static sv_f64_t
+specialcase (sv_f64_t y, sv_f64_t x, sv_f64_t ret, const svbool_t cmp)
+{
+ return sv_call2_f64 (atan2, y, x, ret, cmp);
+}
+
+/* Returns a predicate indicating true if the input is the bit representation of
+ 0, infinity or nan. */
+static inline svbool_t
+zeroinfnan (sv_u64_t i, const svbool_t pg)
+{
+ return svcmpge_u64 (pg, svsub_n_u64_x (pg, svlsl_n_u64_x (pg, i, 1), 1),
+ sv_u64 (2 * asuint64 (INFINITY) - 1));
+}
+
+/* Fast implementation of SVE atan2. Errors are greatest when y and
+ x are reasonably close together. The greatest observed error is 2.28 ULP:
+ sv_atan2(-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
+ got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */
+sv_f64_t
+__sv_atan2_x (sv_f64_t y, sv_f64_t x, const svbool_t pg)
+{
+ sv_u64_t ix = sv_as_u64_f64 (x);
+ sv_u64_t iy = sv_as_u64_f64 (y);
+
+ svbool_t cmp_x = zeroinfnan (ix, pg);
+ svbool_t cmp_y = zeroinfnan (iy, pg);
+ svbool_t cmp_xy = svorr_b_z (pg, cmp_x, cmp_y);
+
+ sv_u64_t sign_x = svand_u64_x (pg, ix, SignMask);
+ sv_u64_t sign_y = svand_u64_x (pg, iy, SignMask);
+ sv_u64_t sign_xy = sveor_u64_x (pg, sign_x, sign_y);
+
+ sv_f64_t ax = svabs_f64_x (pg, x);
+ sv_f64_t ay = svabs_f64_x (pg, y);
+
+ svbool_t pred_xlt0 = svcmplt_f64 (pg, x, sv_f64 (0.0));
+ svbool_t pred_aygtax = svcmpgt_f64 (pg, ay, ax);
+
+ /* Set up z for call to atan. */
+ sv_f64_t n = svsel_f64 (pred_aygtax, svneg_f64_x (pg, ax), ay);
+ sv_f64_t d = svsel_f64 (pred_aygtax, ay, ax);
+ sv_f64_t z = svdiv_f64_x (pg, n, d);
+
+ /* Work out the correct shift. */
+ sv_f64_t shift = svsel_f64 (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0));
+ shift = svsel_f64 (pred_aygtax, svadd_n_f64_x (pg, shift, 1.0), shift);
+ shift = svmul_f64_x (pg, shift, PiOver2);
+
+ sv_f64_t ret = __sv_atan_common (pg, pg, z, z, shift);
+
+ /* Account for the sign of x and y. */
+ ret = sv_as_f64_u64 (sveor_u64_x (pg, sv_as_u64_f64 (ret), sign_xy));
+
+ if (unlikely (svptest_any (pg, cmp_xy)))
+ {
+ return specialcase (y, x, ret, cmp_xy);
+ }
+
+ return ret;
+}
+
+PL_ALIAS (__sv_atan2_x, _ZGVsMxvv_atan2)
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (SV, D, 2, atan2)
+PL_TEST_ULP (__sv_atan2, 1.78)
+PL_TEST_INTERVAL (__sv_atan2, -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (__sv_atan2, -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (__sv_atan2, 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (__sv_atan2, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (__sv_atan2, 1e6, 1e32, 40000)
+#endif
diff --git a/pl/math/sv_atan2f_3u.c b/pl/math/sv_atan2f_3u.c
new file mode 100644
index 0000000..f7674c4
--- /dev/null
+++ b/pl/math/sv_atan2f_3u.c
@@ -0,0 +1,94 @@
+/*
+ * Single-precision vector atan2f(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#include "sv_atanf_common.h"
+
+/* Useful constants. */
+#define PiOver2 sv_f32 (0x1.921fb6p+0f)
+#define SignMask sv_u32 (0x80000000)
+
+/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
+static inline sv_f32_t
+specialcase (sv_f32_t y, sv_f32_t x, sv_f32_t ret, const svbool_t cmp)
+{
+ return sv_call2_f32 (atan2f, y, x, ret, cmp);
+}
+
+/* Returns a predicate indicating true if the input is the bit representation of
+ 0, infinity or nan. */
+static inline svbool_t
+zeroinfnan (sv_u32_t i, const svbool_t pg)
+{
+ return svcmpge_u32 (pg, svsub_n_u32_x (pg, svlsl_n_u32_x (pg, i, 1), 1),
+ sv_u32 (2 * 0x7f800000lu - 1));
+}
+
+/* Fast implementation of SVE atan2f based on atan(x) ~ shift + z + z^3 * P(z^2)
+ with reduction to [0,1] using z=1/x and shift = pi/2.
+ Maximum observed error is 2.95 ULP:
+ __sv_atan2f(0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
+ want 0x1.967f00p-1. */
+sv_f32_t
+__sv_atan2f_x (sv_f32_t y, sv_f32_t x, const svbool_t pg)
+{
+ sv_u32_t ix = sv_as_u32_f32 (x);
+ sv_u32_t iy = sv_as_u32_f32 (y);
+
+ svbool_t cmp_x = zeroinfnan (ix, pg);
+ svbool_t cmp_y = zeroinfnan (iy, pg);
+ svbool_t cmp_xy = svorr_b_z (pg, cmp_x, cmp_y);
+
+ sv_u32_t sign_x = svand_u32_x (pg, ix, SignMask);
+ sv_u32_t sign_y = svand_u32_x (pg, iy, SignMask);
+ sv_u32_t sign_xy = sveor_u32_x (pg, sign_x, sign_y);
+
+ sv_f32_t ax = svabs_f32_x (pg, x);
+ sv_f32_t ay = svabs_f32_x (pg, y);
+
+ svbool_t pred_xlt0 = svcmplt_f32 (pg, x, sv_f32 (0.0));
+ svbool_t pred_aygtax = svcmpgt_f32 (pg, ay, ax);
+
+ /* Set up z for call to atan. */
+ sv_f32_t n = svsel_f32 (pred_aygtax, svneg_f32_x (pg, ax), ay);
+ sv_f32_t d = svsel_f32 (pred_aygtax, ay, ax);
+ sv_f32_t z = svdiv_f32_x (pg, n, d);
+
+ /* Work out the correct shift. */
+ sv_f32_t shift = svsel_f32 (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0));
+ shift = svsel_f32 (pred_aygtax, svadd_n_f32_x (pg, shift, 1.0), shift);
+ shift = svmul_f32_x (pg, shift, PiOver2);
+
+ sv_f32_t ret = __sv_atanf_common (pg, pg, z, z, shift);
+
+ /* Account for the sign of x and y. */
+ ret = sv_as_f32_u32 (sveor_u32_x (pg, sv_as_u32_f32 (ret), sign_xy));
+
+ if (unlikely (svptest_any (pg, cmp_xy)))
+ {
+ return specialcase (y, x, ret, cmp_xy);
+ }
+
+ return ret;
+}
+
+PL_ALIAS (__sv_atan2f_x, _ZGVsMxvv_atan2f)
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (SV, F, 2, atan2)
+PL_TEST_ULP (__sv_atan2f, 2.45)
+PL_TEST_INTERVAL (__sv_atan2f, -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (__sv_atan2f, -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (__sv_atan2f, 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (__sv_atan2f, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (__sv_atan2f, 1e6, 1e32, 40000)
+#endif
diff --git a/pl/math/sv_atan_2u5.c b/pl/math/sv_atan_2u5.c
new file mode 100644
index 0000000..02ac331
--- /dev/null
+++ b/pl/math/sv_atan_2u5.c
@@ -0,0 +1,62 @@
+/*
+ * Double-precision vector atan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#include "sv_atan_common.h"
+
+/* Useful constants. */
+#define PiOver2 sv_f64 (0x1.921fb54442d18p+0)
+#define AbsMask (0x7fffffffffffffff)
+
+/* Fast implementation of SVE atan.
+ Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed
+ error is 2.27 ulps:
+ __sv_atan(0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
+ want 0x1.9225645bdd7c3p-1. */
+sv_f64_t
+__sv_atan_x (sv_f64_t x, const svbool_t pg)
+{
+ /* No need to trigger special case. Small cases, infs and nans
+ are supported by our approximation technique. */
+ sv_u64_t ix = sv_as_u64_f64 (x);
+ sv_u64_t sign = svand_n_u64_x (pg, ix, ~AbsMask);
+
+ /* Argument reduction:
+ y := arctan(x) for x < 1
+ y := pi/2 + arctan(-1/x) for x > 1
+ Hence, use z=-1/a if x>=1, otherwise z=a. */
+ svbool_t red = svacgt_n_f64 (pg, x, 1.0);
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ sv_f64_t z = svsel_f64 (red, svdiv_f64_x (pg, sv_f64 (-1.0), x), x);
+ /* Use absolute value only when needed (odd powers of z). */
+ sv_f64_t az = svabs_f64_x (pg, z);
+ az = svneg_f64_m (az, red, az);
+
+ sv_f64_t y = __sv_atan_common (pg, red, z, az, PiOver2);
+
+ /* y = atan(x) if x>0, -atan(-x) otherwise. */
+ y = sv_as_f64_u64 (sveor_u64_x (pg, sv_as_u64_f64 (y), sign));
+
+ return y;
+}
+
+PL_ALIAS (__sv_atan_x, _ZGVsMxv_atan)
+
+PL_SIG (SV, D, 1, atan, -3.1, 3.1)
+PL_TEST_ULP (__sv_atan, 1.78)
+PL_TEST_INTERVAL (__sv_atan, -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (__sv_atan, -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (__sv_atan, 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (__sv_atan, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (__sv_atan, 1e6, 1e32, 40000)
+#endif
diff --git a/pl/math/sv_atan_common.h b/pl/math/sv_atan_common.h
new file mode 100644
index 0000000..bfe6998
--- /dev/null
+++ b/pl/math/sv_atan_common.h
@@ -0,0 +1,61 @@
+/*
+ * Double-precision polynomial evaluation function for SVE atan(x) and
+ * atan2(y,x).
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "sv_math.h"
+
+#define P(i) sv_f64 (__atan_poly_data.poly[i])
+
+/* Polynomial used in fast SVE atan(x) and atan2(y,x) implementations
+ The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
+static inline sv_f64_t
+__sv_atan_common (svbool_t pg, svbool_t red, sv_f64_t z, sv_f64_t az,
+ sv_f64_t shift)
+{
+ /* Use full Estrin scheme for P(z^2) with deg(P)=19. */
+ sv_f64_t z2 = svmul_f64_x (pg, z, z);
+
+ /* Level 1. */
+ sv_f64_t P_1_0 = sv_fma_f64_x (pg, P (1), z2, P (0));
+ sv_f64_t P_3_2 = sv_fma_f64_x (pg, P (3), z2, P (2));
+ sv_f64_t P_5_4 = sv_fma_f64_x (pg, P (5), z2, P (4));
+ sv_f64_t P_7_6 = sv_fma_f64_x (pg, P (7), z2, P (6));
+ sv_f64_t P_9_8 = sv_fma_f64_x (pg, P (9), z2, P (8));
+ sv_f64_t P_11_10 = sv_fma_f64_x (pg, P (11), z2, P (10));
+ sv_f64_t P_13_12 = sv_fma_f64_x (pg, P (13), z2, P (12));
+ sv_f64_t P_15_14 = sv_fma_f64_x (pg, P (15), z2, P (14));
+ sv_f64_t P_17_16 = sv_fma_f64_x (pg, P (17), z2, P (16));
+ sv_f64_t P_19_18 = sv_fma_f64_x (pg, P (19), z2, P (18));
+
+ /* Level 2. */
+ sv_f64_t x2 = svmul_f64_x (pg, z2, z2);
+ sv_f64_t P_3_0 = sv_fma_f64_x (pg, P_3_2, x2, P_1_0);
+ sv_f64_t P_7_4 = sv_fma_f64_x (pg, P_7_6, x2, P_5_4);
+ sv_f64_t P_11_8 = sv_fma_f64_x (pg, P_11_10, x2, P_9_8);
+ sv_f64_t P_15_12 = sv_fma_f64_x (pg, P_15_14, x2, P_13_12);
+ sv_f64_t P_19_16 = sv_fma_f64_x (pg, P_19_18, x2, P_17_16);
+
+ /* Level 3. */
+ sv_f64_t x4 = svmul_f64_x (pg, x2, x2);
+ sv_f64_t P_7_0 = sv_fma_f64_x (pg, P_7_4, x4, P_3_0);
+ sv_f64_t P_15_8 = sv_fma_f64_x (pg, P_15_12, x4, P_11_8);
+
+ /* Level 4. */
+ sv_f64_t x8 = svmul_f64_x (pg, x4, x4);
+ sv_f64_t y = sv_fma_f64_x (pg, P_19_16, x8, P_15_8);
+ y = sv_fma_f64_x (pg, y, x8, P_7_0);
+
+ /* Finalize. y = shift + z + z^3 * P(z^2). */
+ sv_f64_t z3 = svmul_f64_x (pg, z2, az);
+ y = sv_fma_f64_x (pg, y, z3, az);
+
+ /* Apply shift as indicated by `red` predicate. */
+ y = svadd_f64_m (red, y, shift);
+
+ return y;
+}
diff --git a/pl/math/sv_atanf_2u9.c b/pl/math/sv_atanf_2u9.c
new file mode 100644
index 0000000..8d38e42
--- /dev/null
+++ b/pl/math/sv_atanf_2u9.c
@@ -0,0 +1,59 @@
+/*
+ * Single-precision vector atan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#include "sv_atanf_common.h"
+
+#define PiOver2 sv_f32 (0x1.921fb6p+0f)
+#define AbsMask (0x7fffffff)
+
+/* Fast implementation of SVE atanf based on
+ atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=-1/x and shift = pi/2.
+ Largest observed error is 2.9 ULP, close to +/-1.0:
+ __sv_atanf(0x1.0468f6p+0) got -0x1.967f06p-1
+ want -0x1.967fp-1. */
+sv_f32_t
+__sv_atanf_x (sv_f32_t x, const svbool_t pg)
+{
+ /* No need to trigger special case. Small cases, infs and nans
+ are supported by our approximation technique. */
+ sv_u32_t ix = sv_as_u32_f32 (x);
+ sv_u32_t sign = svand_n_u32_x (pg, ix, ~AbsMask);
+
+ /* Argument reduction:
+ y := arctan(x) for x < 1
+ y := pi/2 + arctan(-1/x) for x > 1
+ Hence, use z=-1/a if x>=1, otherwise z=a. */
+ svbool_t red = svacgt_n_f32 (pg, x, 1.0f);
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ sv_f32_t z = svsel_f32 (red, svdiv_f32_x (pg, sv_f32 (-1.0f), x), x);
+ /* Use absolute value only when needed (odd powers of z). */
+ sv_f32_t az = svabs_f32_x (pg, z);
+ az = svneg_f32_m (az, red, az);
+
+ sv_f32_t y = __sv_atanf_common (pg, red, z, az, PiOver2);
+
+ /* y = atan(x) if x>0, -atan(-x) otherwise. */
+ return sv_as_f32_u32 (sveor_u32_x (pg, sv_as_u32_f32 (y), sign));
+}
+
+PL_ALIAS (__sv_atanf_x, _ZGVsMxv_atanf)
+
+PL_SIG (SV, F, 1, atan, -3.1, 3.1)
+PL_TEST_ULP (__sv_atanf, 2.9)
+PL_TEST_INTERVAL (__sv_atanf, -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (__sv_atanf, -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (__sv_atanf, 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (__sv_atanf, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (__sv_atanf, 1e6, 1e32, 40000)
+#endif
diff --git a/pl/math/sv_atanf_common.h b/pl/math/sv_atanf_common.h
new file mode 100644
index 0000000..dc45eff
--- /dev/null
+++ b/pl/math/sv_atanf_common.h
@@ -0,0 +1,47 @@
+/*
+ * Single-precision polynomial evaluation function for SVE atan(x) and
+ * atan2(y,x).
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_SV_ATANF_COMMON_H
+#define PL_MATH_SV_ATANF_COMMON_H
+
+#include "math_config.h"
+#include "sv_math.h"
+
+#define P(i) sv_f32 (__atanf_poly_data.poly[i])
+
+/* Polynomial used in fast SVE atanf(x) and atan2f(y,x) implementations
+ The order 7 polynomial P approximates (f(sqrt(x))-sqrt(x))/x^(3/2). */
+static inline sv_f32_t
+__sv_atanf_common (svbool_t pg, svbool_t red, sv_f32_t z, sv_f32_t az,
+ sv_f32_t shift)
+{
+ /* Use full Estrin scheme for P(z^2) with deg(P)=7. */
+
+ /* First compute square powers of z. */
+ sv_f32_t z2 = svmul_f32_x (pg, z, z);
+ sv_f32_t z4 = svmul_f32_x (pg, z2, z2);
+ sv_f32_t z8 = svmul_f32_x (pg, z4, z4);
+
+ /* Then assemble polynomial. */
+ sv_f32_t p_4_7 = sv_fma_f32_x (pg, z4, (sv_fma_f32_x (pg, z2, P (7), P (6))),
+ (sv_fma_f32_x (pg, z2, P (5), P (4))));
+ sv_f32_t p_0_3 = sv_fma_f32_x (pg, z4, (sv_fma_f32_x (pg, z2, P (3), P (2))),
+ (sv_fma_f32_x (pg, z2, P (1), P (0))));
+ sv_f32_t y = sv_fma_f32_x (pg, z8, p_4_7, p_0_3);
+
+ /* Finalize. y = shift + z + z^3 * P(z^2). */
+ sv_f32_t z3 = svmul_f32_x (pg, z2, az);
+ y = sv_fma_f32_x (pg, y, z3, az);
+
+ /* Apply shift as indicated by 'red' predicate. */
+ y = svadd_f32_m (red, y, shift);
+
+ return y;
+}
+
+#endif // PL_MATH_SV_ATANF_COMMON_H
diff --git a/pl/math/sv_cos_2u5.c b/pl/math/sv_cos_2u5.c
new file mode 100644
index 0000000..1940348
--- /dev/null
+++ b/pl/math/sv_cos_2u5.c
@@ -0,0 +1,84 @@
+/*
+ * Double-precision SVE cos(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define InvPio2 (sv_f64 (0x1.45f306dc9c882p-1))
+#define NegPio2_1 (sv_f64 (-0x1.921fb50000000p+0))
+#define NegPio2_2 (sv_f64 (-0x1.110b460000000p-26))
+#define NegPio2_3 (sv_f64 (-0x1.1a62633145c07p-54))
+/* Original shift used in Neon cos,
+ plus a contribution to set the bit #0 of q
+ as expected by trigonometric instructions. */
+#define Shift (sv_f64 (0x1.8000000000001p52))
+#define RangeVal (sv_f64 (0x1p23))
+#define AbsMask (0x7fffffffffffffff)
+
+static NOINLINE sv_f64_t
+__sv_cos_specialcase (sv_f64_t x, sv_f64_t y, svbool_t cmp)
+{
+ return sv_call_f64 (cos, x, y, cmp);
+}
+
+/* A fast SVE implementation of cos based on trigonometric
+ instructions (FTMAD, FTSSEL, FTSMUL).
+ Maximum measured error: 2.108 ULPs.
+ __sv_cos(0x1.9b0ba158c98f3p+7) got -0x1.fddd4c65c7f07p-3
+ want -0x1.fddd4c65c7f05p-3. */
+sv_f64_t
+__sv_cos_x (sv_f64_t x, const svbool_t pg)
+{
+ sv_f64_t n, r, r2, y;
+ svbool_t cmp;
+
+ r = sv_as_f64_u64 (svand_n_u64_x (pg, sv_as_u64_f64 (x), AbsMask));
+ cmp = svcmpge_u64 (pg, sv_as_u64_f64 (r), sv_as_u64_f64 (RangeVal));
+
+ /* n = rint(|x|/(pi/2)). */
+ sv_f64_t q = sv_fma_f64_x (pg, InvPio2, r, Shift);
+ n = svsub_f64_x (pg, q, Shift);
+
+ /* r = |x| - n*(pi/2) (range reduction into -pi/4 .. pi/4). */
+ r = sv_fma_f64_x (pg, NegPio2_1, n, r);
+ r = sv_fma_f64_x (pg, NegPio2_2, n, r);
+ r = sv_fma_f64_x (pg, NegPio2_3, n, r);
+
+ /* cos(r) poly approx. */
+ r2 = svtsmul_f64 (r, sv_as_u64_f64 (q));
+ y = sv_f64 (0.0);
+ y = svtmad_f64 (y, r2, 7);
+ y = svtmad_f64 (y, r2, 6);
+ y = svtmad_f64 (y, r2, 5);
+ y = svtmad_f64 (y, r2, 4);
+ y = svtmad_f64 (y, r2, 3);
+ y = svtmad_f64 (y, r2, 2);
+ y = svtmad_f64 (y, r2, 1);
+ y = svtmad_f64 (y, r2, 0);
+
+ /* Final multiplicative factor: 1.0 or x depending on bit #0 of q. */
+ sv_f64_t f = svtssel_f64 (r, sv_as_u64_f64 (q));
+ /* Apply factor. */
+ y = svmul_f64_x (pg, f, y);
+
+ /* No need to pass pg to specialcase here since cmp is a strict subset,
+ guaranteed by the cmpge above. */
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_cos_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_cos_x, _ZGVsMxv_cos)
+
+PL_SIG (SV, D, 1, cos, -3.1, 3.1)
+PL_TEST_ULP (__sv_cos, 1.61)
+PL_TEST_INTERVAL (__sv_cos, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (__sv_cos, 0x1p-4, 0x1p4, 500000)
+#endif
diff --git a/pl/math/sv_cosf_2u1.c b/pl/math/sv_cosf_2u1.c
new file mode 100644
index 0000000..8f138bc
--- /dev/null
+++ b/pl/math/sv_cosf_2u1.c
@@ -0,0 +1,82 @@
+/*
+ * Single-precision SVE cos(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define NegPio2_1 (sv_f32 (-0x1.921fb6p+0f))
+#define NegPio2_2 (sv_f32 (0x1.777a5cp-25f))
+#define NegPio2_3 (sv_f32 (0x1.ee59dap-50f))
+#define RangeVal (sv_f32 (0x1p20f))
+#define InvPio2 (sv_f32 (0x1.45f306p-1f))
+/* Original shift used in Neon cosf,
+ plus a contribution to set the bit #0 of q
+ as expected by trigonometric instructions. */
+#define Shift (sv_f32 (0x1.800002p+23f))
+#define AbsMask (0x7fffffff)
+
+static NOINLINE sv_f32_t
+__sv_cosf_specialcase (sv_f32_t x, sv_f32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (cosf, x, y, cmp);
+}
+
+/* A fast SVE implementation of cosf based on trigonometric
+ instructions (FTMAD, FTSSEL, FTSMUL).
+ Maximum measured error: 2.06 ULPs.
+ __sv_cosf(0x1.dea2f2p+19) got 0x1.fffe7ap-6
+ want 0x1.fffe76p-6. */
+sv_f32_t
+__sv_cosf_x (sv_f32_t x, const svbool_t pg)
+{
+ sv_f32_t n, r, r2, y;
+ svbool_t cmp;
+
+ r = sv_as_f32_u32 (svand_n_u32_x (pg, sv_as_u32_f32 (x), AbsMask));
+ cmp = svcmpge_u32 (pg, sv_as_u32_f32 (r), sv_as_u32_f32 (RangeVal));
+
+ /* n = rint(|x|/(pi/2)). */
+ sv_f32_t q = sv_fma_f32_x (pg, InvPio2, r, Shift);
+ n = svsub_f32_x (pg, q, Shift);
+
+ /* r = |x| - n*(pi/2) (range reduction into -pi/4 .. pi/4). */
+ r = sv_fma_f32_x (pg, NegPio2_1, n, r);
+ r = sv_fma_f32_x (pg, NegPio2_2, n, r);
+ r = sv_fma_f32_x (pg, NegPio2_3, n, r);
+
+ /* Final multiplicative factor: 1.0 or x depending on bit #0 of q. */
+ sv_f32_t f = svtssel_f32 (r, sv_as_u32_f32 (q));
+
+ /* cos(r) poly approx. */
+ r2 = svtsmul_f32 (r, sv_as_u32_f32 (q));
+ y = sv_f32 (0.0f);
+ y = svtmad_f32 (y, r2, 4);
+ y = svtmad_f32 (y, r2, 3);
+ y = svtmad_f32 (y, r2, 2);
+ y = svtmad_f32 (y, r2, 1);
+ y = svtmad_f32 (y, r2, 0);
+
+ /* Apply factor. */
+ y = svmul_f32_x (pg, f, y);
+
+ /* No need to pass pg to specialcase here since cmp is a strict subset,
+ guaranteed by the cmpge above. */
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_cosf_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_cosf_x, _ZGVsMxv_cosf)
+
+PL_SIG (SV, F, 1, cos, -3.1, 3.1)
+PL_TEST_ULP (__sv_cosf, 1.57)
+PL_TEST_INTERVAL (__sv_cosf, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (__sv_cosf, 0x1p-4, 0x1p4, 500000)
+#endif
diff --git a/pl/math/sv_erf_3u.c b/pl/math/sv_erf_3u.c
new file mode 100644
index 0000000..bec7f8a
--- /dev/null
+++ b/pl/math/sv_erf_3u.c
@@ -0,0 +1,103 @@
+/*
+ * Double-precision SVE erf(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define Scale (8.0)
+#define AbsMask (0x7fffffffffffffff)
+
+static NOINLINE sv_f64_t
+__sv_erf_specialcase (sv_f64_t x, sv_f64_t y, svbool_t cmp)
+{
+ return sv_call_f64 (erf, x, y, cmp);
+}
+
+/* Optimized double precision SVE error function erf.
+ Maximum observed error is 2.62 ULP:
+ __sv_erf(0x1.79cab7e3078fap+2) got 0x1.0000000000001p+0
+ want 0x1.fffffffffffffp-1. */
+sv_f64_t
+__sv_erf_x (sv_f64_t x, const svbool_t pg)
+{
+ /* Use top 16 bits to test for special cases and small values. */
+ sv_u64_t ix = sv_as_u64_f64 (x);
+ sv_u64_t atop = svand_n_u64_x (pg, svlsr_n_u64_x (pg, ix, 48), 0x7fff);
+
+ /* Handle both inf/nan as well as small values (|x|<2^-28). */
+ svbool_t cmp
+ = svcmpge_n_u64 (pg, svsub_n_u64_x (pg, atop, 0x3e30), 0x7ff0 - 0x3e30);
+
+ /* Get sign and absolute value. */
+ sv_f64_t a = sv_as_f64_u64 (svand_n_u64_x (pg, ix, AbsMask));
+ sv_u64_t sign = svand_n_u64_x (pg, ix, ~AbsMask);
+
+ /* i = trunc(Scale*x). */
+ sv_f64_t a_scale = svmul_n_f64_x (pg, a, Scale);
+ /* Saturate index of intervals. */
+ svbool_t a_lt_6 = svcmplt_n_u64 (pg, atop, 0x4018);
+ sv_u64_t i = svcvt_u64_f64_m (sv_u64 (V_ERF_NINTS - 1), a_lt_6, a_scale);
+
+ /* Load polynomial coefficients. */
+ sv_f64_t P_0 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[0], i);
+ sv_f64_t P_1 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[1], i);
+ sv_f64_t P_2 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[2], i);
+ sv_f64_t P_3 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[3], i);
+ sv_f64_t P_4 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[4], i);
+ sv_f64_t P_5 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[5], i);
+ sv_f64_t P_6 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[6], i);
+ sv_f64_t P_7 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[7], i);
+ sv_f64_t P_8 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[8], i);
+ sv_f64_t P_9 = sv_lookup_f64_x (pg, __v_erf_data.coeffs[9], i);
+
+ /* Get shift and scale. */
+ sv_f64_t shift = sv_lookup_f64_x (pg, __v_erf_data.shifts, i);
+
+ /* Transform polynomial variable.
+ Set z = 0 in the boring domain to avoid overflow. */
+ sv_f64_t z = svmla_f64_m (a_lt_6, shift, sv_f64 (Scale), a);
+
+ /* Evaluate polynomial P(z) using level-2 Estrin. */
+ sv_f64_t r1 = sv_fma_f64_x (pg, z, P_1, P_0);
+ sv_f64_t r2 = sv_fma_f64_x (pg, z, P_3, P_2);
+ sv_f64_t r3 = sv_fma_f64_x (pg, z, P_5, P_4);
+ sv_f64_t r4 = sv_fma_f64_x (pg, z, P_7, P_6);
+ sv_f64_t r5 = sv_fma_f64_x (pg, z, P_9, P_8);
+
+ sv_f64_t z2 = svmul_f64_x (pg, z, z);
+ sv_f64_t z4 = svmul_f64_x (pg, z2, z2);
+
+ sv_f64_t q2 = sv_fma_f64_x (pg, r4, z2, r3);
+ sv_f64_t q1 = sv_fma_f64_x (pg, r2, z2, r1);
+
+ sv_f64_t y = sv_fma_f64_x (pg, z4, r5, q2);
+ y = sv_fma_f64_x (pg, z4, y, q1);
+
+ /* y = erf(x) if x > 0, -erf(-x) otherwise. */
+ y = sv_as_f64_u64 (sveor_u64_x (pg, sv_as_u64_f64 (y), sign));
+
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_erf_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_erf_x, _ZGVsMxv_erf)
+
+PL_SIG (SV, D, 1, erf, -4.0, 4.0)
+PL_TEST_ULP (__sv_erf, 2.13)
+PL_TEST_INTERVAL (__sv_erf, 0, 0x1p-28, 20000)
+PL_TEST_INTERVAL (__sv_erf, 0x1p-28, 1, 60000)
+PL_TEST_INTERVAL (__sv_erf, 1, 0x1p28, 60000)
+PL_TEST_INTERVAL (__sv_erf, 0x1p28, inf, 20000)
+PL_TEST_INTERVAL (__sv_erf, -0, -0x1p-28, 20000)
+PL_TEST_INTERVAL (__sv_erf, -0x1p-28, -1, 60000)
+PL_TEST_INTERVAL (__sv_erf, -1, -0x1p28, 60000)
+PL_TEST_INTERVAL (__sv_erf, -0x1p28, -inf, 20000)
+#endif
diff --git a/pl/math/sv_erfc_4u.c b/pl/math/sv_erfc_4u.c
new file mode 100644
index 0000000..076b471
--- /dev/null
+++ b/pl/math/sv_erfc_4u.c
@@ -0,0 +1,146 @@
+/*
+ * Double-precision SVE erfc(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+#include "sv_exp_tail.h"
+
+sv_f64_t __sv_exp_x (sv_f64_t, svbool_t);
+
+static NOINLINE sv_f64_t
+specialcase (sv_f64_t x, sv_f64_t y, svbool_t special)
+{
+ return sv_call_f64 (erfc, x, y, special);
+}
+
+static inline sv_u64_t
+lookup_interval_idx (const svbool_t pg, sv_f64_t abs_x)
+{
+ /* Interval index is calculated by (((abs(x) + 1)^4) >> 53) - 1023, bounded by
+ the number of polynomials. */
+ sv_f64_t xp1 = svadd_n_f64_x (pg, abs_x, 1);
+ xp1 = svmul_f64_x (pg, xp1, xp1);
+ xp1 = svmul_f64_x (pg, xp1, xp1);
+ sv_u64_t interval_idx
+ = svsub_n_u64_x (pg, svlsr_n_u64_x (pg, sv_as_u64_f64 (xp1), 52), 1023);
+ return svsel_u64 (svcmple_n_u64 (pg, interval_idx, ERFC_NUM_INTERVALS),
+ interval_idx, sv_u64 (ERFC_NUM_INTERVALS));
+}
+
+static inline sv_f64_t
+sv_eval_poly (const svbool_t pg, sv_f64_t z, sv_u64_t idx)
+{
+ sv_u64_t offset = svmul_n_u64_x (pg, idx, ERFC_POLY_ORDER + 1);
+ const double *base = &__v_erfc_data.poly[0][12];
+ sv_f64_t r = sv_lookup_f64_x (pg, base, offset);
+ for (int i = 0; i < ERFC_POLY_ORDER; i++)
+ {
+ base--;
+ sv_f64_t c = sv_lookup_f64_x (pg, base, offset);
+ r = sv_fma_f64_x (pg, z, r, c);
+ }
+ return r;
+}
+
+static inline sv_f64_t
+sv_eval_gauss (const svbool_t pg, sv_f64_t abs_x)
+{
+ /* Accurate evaluation of exp(-x^2). This operation is sensitive to rounding
+ errors in x^2, so we compute an estimate for the error and use a custom exp
+ helper which corrects for the calculated error estimate. */
+ sv_f64_t a2 = svmul_f64_x (pg, abs_x, abs_x);
+
+ /* Split abs_x into (a_hi + a_lo), where a_hi is the 'large' component and
+ a_lo is the 'small' component. */
+ const sv_f64_t scale = sv_f64 (0x1.0000002p27);
+ sv_f64_t a_hi = svneg_f64_x (pg, sv_fma_f64_x (pg, scale, abs_x,
+ svneg_f64_x (pg, abs_x)));
+ a_hi = sv_fma_f64_x (pg, scale, abs_x, a_hi);
+ sv_f64_t a_lo = svsub_f64_x (pg, abs_x, a_hi);
+
+ sv_f64_t a_hi_neg = svneg_f64_x (pg, a_hi);
+ sv_f64_t a_lo_neg = svneg_f64_x (pg, a_lo);
+
+ /* We can then estimate the error in abs_x^2 by computing (abs_x * abs_x) -
+ (a_hi + a_lo) * (a_hi + a_lo). */
+ sv_f64_t e2 = sv_fma_f64_x (pg, a_hi_neg, a_hi, a2);
+ e2 = sv_fma_f64_x (pg, a_hi_neg, a_lo, e2);
+ e2 = sv_fma_f64_x (pg, a_lo_neg, a_hi, e2);
+ e2 = sv_fma_f64_x (pg, a_lo_neg, a_lo, e2);
+
+ return sv_exp_tail (pg, svneg_f64_x (pg, a2), e2);
+}
+
+/* Optimized double precision vector complementary error function erfc.
+ Maximum measured error is 3.64 ULP:
+ __sv_erfc(0x1.4792573ee6cc7p+2) got 0x1.ff3f4c8e200d5p-42
+ want 0x1.ff3f4c8e200d9p-42. */
+sv_f64_t
+__sv_erfc_x (sv_f64_t x, const svbool_t pg)
+{
+ sv_u64_t ix = sv_as_u64_f64 (x);
+ sv_f64_t abs_x = svabs_f64_x (pg, x);
+ sv_u64_t atop = svlsr_n_u64_x (pg, sv_as_u64_f64 (abs_x), 52);
+
+ /* Outside of the 'interesting' bounds, [-6, 28], +ve goes to 0, -ve goes
+ to 2. As long as the polynomial is 0 in the boring zone, we can assemble
+ the result correctly. This is dealt with in two ways:
+
+ The 'coarse approach' is that the approximation algorithm is
+ zero-predicated on in_bounds = |x| < 32, which saves the need to do
+ coefficient lookup etc for |x| >= 32.
+
+ The coarse approach misses [-32, -6] and [28, 32], which are dealt with in
+ the polynomial and index calculation, such that the polynomial evaluates to
+ 0 in these regions. */
+ /* in_bounds is true for lanes where |x| < 32. */
+ svbool_t in_bounds = svcmplt_n_u64 (pg, atop, 0x404);
+ /* boring_zone = 2 for x < 0, 0 otherwise. */
+ sv_f64_t boring_zone
+ = sv_as_f64_u64 (svlsl_n_u64_x (pg, svlsr_n_u64_x (pg, ix, 63), 62));
+ /* Very small, nan and inf. */
+ svbool_t special_cases
+ = svcmpge_n_u64 (pg, svsub_n_u64_x (pg, atop, 0x3cd), 0x432);
+
+ /* erfc(|x|) ~= P_i(|x|-x_i)*exp(-x^2)
+
+ Where P_i is a polynomial and x_i is an offset, both defined in
+ v_erfc_data.c. i is chosen based on which interval x falls in. */
+ sv_u64_t i = lookup_interval_idx (in_bounds, abs_x);
+ sv_f64_t x_i = sv_lookup_f64_x (in_bounds, __v_erfc_data.interval_bounds, i);
+ sv_f64_t p = sv_eval_poly (in_bounds, svsub_f64_x (pg, abs_x, x_i), i);
+ /* 'copy' sign of x to p, i.e. negate p if x is negative. */
+ sv_u64_t sign = svbic_n_u64_z (in_bounds, ix, 0x7fffffffffffffff);
+ p = sv_as_f64_u64 (sveor_u64_z (in_bounds, sv_as_u64_f64 (p), sign));
+
+ sv_f64_t e = sv_eval_gauss (in_bounds, abs_x);
+
+ /* Assemble result: 2-p*e if x<0, p*e otherwise. No need to conditionally
+ select boring_zone because P[V_ERFC_NINTS-1]=0. */
+ sv_f64_t y = sv_fma_f64_x (pg, p, e, boring_zone);
+
+ if (unlikely (svptest_any (pg, special_cases)))
+ {
+ return specialcase (x, y, special_cases);
+ }
+ return y;
+}
+
+PL_ALIAS (__sv_erfc_x, _ZGVsMxv_erfc)
+
+PL_SIG (SV, D, 1, erfc, -4.0, 10.0)
+PL_TEST_ULP (__sv_erfc, 3.15)
+PL_TEST_INTERVAL (__sv_erfc, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (__sv_erfc, 0x1p-127, 0x1p-26, 40000)
+PL_TEST_INTERVAL (__sv_erfc, -0x1p-127, -0x1p-26, 40000)
+PL_TEST_INTERVAL (__sv_erfc, 0x1p-26, 0x1p5, 40000)
+PL_TEST_INTERVAL (__sv_erfc, -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (__sv_erfc, 0, inf, 40000)
+#endif
diff --git a/pl/math/sv_erff_1u3.c b/pl/math/sv_erff_1u3.c
new file mode 100644
index 0000000..c7a738c
--- /dev/null
+++ b/pl/math/sv_erff_1u3.c
@@ -0,0 +1,104 @@
+/*
+ * Single-precision vector erf(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define AbsMask (0x7fffffff)
+
+static NOINLINE sv_f32_t
+__sv_erff_specialcase (sv_f32_t x, sv_f32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (erff, x, y, cmp);
+}
+
+sv_f32_t __sv_expf_x (svbool_t, sv_f32_t);
+
+/* Optimized single precision vector erf. Worst-case error is 1.25 ULP:
+ __sv_erff(0x1.dc59fap-1) got 0x1.9f9c88p-1
+ want 0x1.9f9c8ap-1. */
+sv_f32_t
+__sv_erff_x (sv_f32_t x, const svbool_t pg)
+{
+ sv_u32_t ix = sv_as_u32_f32 (x);
+ sv_u32_t atop = svand_n_u32_x (pg, svlsr_n_u32_x (pg, ix, 16), 0x7fff);
+ /* Handle both inf/nan as well as small values (|x|<2^-28). */
+ svbool_t cmp
+ = svcmpge_n_u32 (pg, svsub_n_u32_x (pg, atop, 0x3180), 0x7ff0 - 0x3180);
+
+ sv_u32_t sign = svand_n_u32_x (pg, ix, ~AbsMask);
+ /* |x| < 0.921875. */
+ svbool_t red = svaclt_n_f32 (pg, x, 0.921875f);
+ /* |x| > 4.0. */
+ svbool_t bor = svacgt_n_f32 (pg, x, 4.0f);
+
+ /* Load polynomial coefficients. */
+ sv_u32_t idx_lo = svsel (red, sv_u32 (0), sv_u32 (1));
+ sv_u32_t idx_hi = svadd_n_u32_x (pg, idx_lo, 2);
+
+ const float *base = (float *) __v_erff_data.coeffs;
+ sv_f32_t c_2_5 = svld1rq (svptrue_b32 (), base + 2);
+ sv_f32_t c_6_9 = svld1rq (svptrue_b32 (), base + 6);
+ sv_f32_t c_10_13 = svld1rq (svptrue_b32 (), base + 10);
+
+ /* Do not need to store elem 0 of __v_erff_data as it is not used. */
+ sv_f32_t p1 = svtbl (c_2_5, idx_lo);
+ sv_f32_t p2 = svtbl (c_2_5, idx_hi);
+ sv_f32_t p3 = svtbl (c_6_9, idx_lo);
+ sv_f32_t p4 = svtbl (c_6_9, idx_hi);
+ sv_f32_t p5 = svtbl (c_10_13, idx_lo);
+ sv_f32_t p6 = svtbl (c_10_13, idx_hi);
+
+ sv_f32_t a = svabs_f32_x (pg, x);
+ /* Square with merging mul - z is x^2 for reduced, |x| otherwise. */
+ sv_f32_t z = svmul_f32_m (red, a, a);
+
+ /* Evaluate polynomial on |x| or x^2. */
+ sv_f32_t r = sv_fma_f32_x (pg, z, p6, p5);
+ r = sv_fma_f32_x (pg, z, r, p4);
+ r = sv_fma_f32_x (pg, z, r, p3);
+ r = sv_fma_f32_x (pg, z, r, p2);
+ r = sv_fma_f32_x (pg, z, r, p1);
+ /* Use merging svmad for last operation - apply first coefficient if not
+ reduced, otherwise r is propagated unchanged. This is because the reduced
+ polynomial has lower order than the non-reduced. */
+ r = svmad_n_f32_m (svnot_b_z (pg, red), r, z, base[1]);
+ r = sv_fma_f32_x (pg, a, r, a);
+
+ /* y = |x| + |x| * P(x^2) if |x| < 0.921875
+ y = 1 - exp (-(|x| + |x| * P(|x|))) otherwise. */
+ sv_f32_t y = __sv_expf_x (pg, svneg_f32_x (pg, r));
+ y = svsel_f32 (red, r, svsubr_n_f32_x (pg, y, 1.0));
+
+ /* Boring domain (absolute value is required to get the sign of erf(-nan)
+ right). */
+ y = svsel_f32 (bor, sv_f32 (1.0f), svabs_f32_x (pg, y));
+
+ /* y = erf(x) if x>0, -erf(-x) otherwise. */
+ y = sv_as_f32_u32 (sveor_u32_x (pg, sv_as_u32_f32 (y), sign));
+
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_erff_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_erff_x, _ZGVsMxv_erff)
+
+PL_SIG (SV, F, 1, erf, -4.0, 4.0)
+PL_TEST_ULP (__sv_erff, 0.76)
+PL_TEST_INTERVAL (__sv_erff, 0, 0x1p-28, 20000)
+PL_TEST_INTERVAL (__sv_erff, 0x1p-28, 1, 60000)
+PL_TEST_INTERVAL (__sv_erff, 1, 0x1p28, 60000)
+PL_TEST_INTERVAL (__sv_erff, 0x1p28, inf, 20000)
+PL_TEST_INTERVAL (__sv_erff, -0, -0x1p-28, 20000)
+PL_TEST_INTERVAL (__sv_erff, -0x1p-28, -1, 60000)
+PL_TEST_INTERVAL (__sv_erff, -1, -0x1p28, 60000)
+PL_TEST_INTERVAL (__sv_erff, -0x1p28, -inf, 20000)
+#endif
diff --git a/pl/math/sv_exp_tail.h b/pl/math/sv_exp_tail.h
new file mode 100644
index 0000000..9b739da
--- /dev/null
+++ b/pl/math/sv_exp_tail.h
@@ -0,0 +1,79 @@
+/*
+ * Double-precision SVE e^(x+tail) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef SV_EXP_TAIL_H
+#define SV_EXP_TAIL_H
+
+#include "sv_math.h"
+#if SV_SUPPORTED
+
+#include "v_exp_tail.h"
+
+#define C1 sv_f64 (C1_scal)
+#define C2 sv_f64 (C2_scal)
+#define C3 sv_f64 (C3_scal)
+#define MinusLn2hi (-Ln2hi_scal)
+#define MinusLn2lo (-Ln2lo_scal)
+
+#define N (1 << V_EXP_TAIL_TABLE_BITS)
+#define Tab __v_exp_tail_data
+#define IndexMask (N - 1)
+#define Shift sv_f64 (0x1.8p+52)
+#define Thres 704.0
+
+static inline sv_f64_t
+sv_exp_tail_special_case (svbool_t pg, sv_f64_t s, sv_f64_t y, sv_f64_t n)
+{
+ sv_f64_t absn = svabs_f64_x (pg, n);
+
+ /* 2^(n/N) may overflow, break it up into s1*s2. */
+ sv_u64_t b = svsel_u64 (svcmple_n_f64 (pg, n, 0), sv_u64 (0x6000000000000000),
+ sv_u64 (0));
+ sv_f64_t s1 = sv_as_f64_u64 (svsubr_n_u64_x (pg, b, 0x7000000000000000));
+ sv_f64_t s2 = sv_as_f64_u64 (
+ svadd_u64_x (pg, svsub_n_u64_x (pg, sv_as_u64_f64 (s), 0x3010000000000000),
+ b));
+
+ svbool_t cmp = svcmpgt_n_f64 (pg, absn, 1280.0 * N);
+ sv_f64_t r1 = svmul_f64_x (pg, s1, s1);
+ sv_f64_t r0 = svmul_f64_x (pg, sv_fma_f64_x (pg, y, s2, s2), s1);
+ return svsel_f64 (cmp, r1, r0);
+}
+
+static inline sv_f64_t
+sv_exp_tail (const svbool_t pg, sv_f64_t x, sv_f64_t xtail)
+{
+ /* Calculate exp(x + xtail). */
+ sv_f64_t z = sv_fma_n_f64_x (pg, InvLn2_scal, x, Shift);
+ sv_f64_t n = svsub_f64_x (pg, z, Shift);
+
+ sv_f64_t r = sv_fma_n_f64_x (pg, MinusLn2hi, n, x);
+ r = sv_fma_n_f64_x (pg, MinusLn2lo, n, r);
+
+ sv_u64_t u = sv_as_u64_f64 (z);
+ sv_u64_t e = svlsl_n_u64_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
+ sv_u64_t i = svand_n_u64_x (pg, u, IndexMask);
+
+ sv_f64_t y = sv_fma_f64_x (pg, C3, r, C2);
+ y = sv_fma_f64_x (pg, y, r, C1);
+ y = sv_fma_f64_x (pg, y, r, sv_f64 (1.0));
+ y = sv_fma_f64_x (pg, y, r, xtail);
+
+ /* s = 2^(n/N). */
+ u = sv_lookup_u64_x (pg, Tab, i);
+ sv_f64_t s = sv_as_f64_u64 (svadd_u64_x (pg, u, e));
+
+ svbool_t cmp = svcmpgt_n_f64 (pg, svabs_f64_x (pg, x), Thres);
+ if (unlikely (svptest_any (pg, cmp)))
+ {
+ return sv_exp_tail_special_case (pg, s, y, n);
+ }
+ return sv_fma_f64_x (pg, y, s, s);
+}
+
+#endif
+#endif
diff --git a/pl/math/sv_expf_2u.c b/pl/math/sv_expf_2u.c
new file mode 100644
index 0000000..87fbe45
--- /dev/null
+++ b/pl/math/sv_expf_2u.c
@@ -0,0 +1,156 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define C(i) __sv_expf_poly[i]
+
+#define InvLn2 (0x1.715476p+0f)
+#define Ln2hi (0x1.62e4p-1f)
+#define Ln2lo (0x1.7f7d1cp-20f)
+
+#if SV_EXPF_USE_FEXPA
+
+#define Shift (0x1.903f8p17f) /* 1.5*2^17 + 127. */
+#define Thres \
+ (0x1.5d5e2ap+6f) /* Roughly 87.3. For x < -Thres, the result is subnormal \
+ and not handled correctly by FEXPA. */
+
+static NOINLINE sv_f32_t
+special_case (sv_f32_t x, sv_f32_t y, svbool_t special)
+{
+ /* The special-case handler from the Neon routine does not handle subnormals
+ in a way that is compatible with FEXPA. For the FEXPA variant we just fall
+ back to scalar expf. */
+ return sv_call_f32 (expf, x, y, special);
+}
+
+#else
+
+#define Shift (0x1.8p23f) /* 1.5 * 2^23. */
+#define Thres (126.0f)
+
+/* Special-case handler adapted from Neon variant. Uses s, y and n to produce
+ the final result (normal cases included). It performs an update of all lanes!
+ Therefore:
+ - all previous computation need to be done on all lanes indicated by input
+ pg
+ - we cannot simply apply the special case to the special-case-activated
+ lanes. Besides it is likely that this would not increase performance (no
+ scatter/gather). */
+static inline sv_f32_t
+specialcase (svbool_t pg, sv_f32_t poly, sv_f32_t n, sv_u32_t e,
+ svbool_t p_cmp1, sv_f32_t scale)
+{
+ /* s=2^(n/N) may overflow, break it up into s=s1*s2,
+ such that exp = s + s*y can be computed as s1*(s2+s2*y)
+ and s1*s1 overflows only if n>0. */
+
+ /* If n<=0 then set b to 0x820...0, 0 otherwise. */
+ svbool_t p_sign = svcmple_n_f32 (pg, n, 0.0f); /* n <= 0. */
+ sv_u32_t b
+ = svdup_n_u32_z (p_sign, 0x82000000); /* Inactive lanes set to 0. */
+
+ /* Set s1 to generate overflow depending on sign of exponent n. */
+ sv_f32_t s1
+ = sv_as_f32_u32 (svadd_n_u32_x (pg, b, 0x7f000000)); /* b + 0x7f000000. */
+ /* Offset s to avoid overflow in final result if n is below threshold. */
+ sv_f32_t s2 = sv_as_f32_u32 (
+ svsub_u32_x (pg, e, b)); /* as_u32 (s) - 0x3010...0 + b. */
+
+ /* |n| > 192 => 2^(n/N) overflows. */
+ svbool_t p_cmp2 = svacgt_n_f32 (pg, n, 192.0f);
+
+ sv_f32_t r2 = svmul_f32_x (pg, s1, s1);
+ sv_f32_t r1 = sv_fma_f32_x (pg, poly, s2, s2);
+ r1 = svmul_f32_x (pg, r1, s1);
+ sv_f32_t r0 = sv_fma_f32_x (pg, poly, scale, scale);
+
+ /* Apply condition 1 then 2.
+ Returns r2 if cond2 is true, otherwise
+ if cond1 is true then return r1, otherwise return r0. */
+ sv_f32_t r = svsel_f32 (p_cmp1, r1, r0);
+
+ return svsel_f32 (p_cmp2, r2, r);
+}
+
+#endif
+
+/* Optimised single-precision SVE exp function. By default this is an SVE port
+ of the Neon algorithm from math/. Alternatively, enable a modification of
+ that algorithm that looks up scale using SVE FEXPA instruction with
+ SV_EXPF_USE_FEXPA.
+
+ Worst-case error of the default algorithm is 1.95 ulp:
+ __sv_expf(-0x1.4cb74ap+2) got 0x1.6a022cp-8
+ want 0x1.6a023p-8.
+
+ Worst-case error when using FEXPA is 1.04 ulp:
+ __sv_expf(0x1.a8eda4p+1) got 0x1.ba74bcp+4
+ want 0x1.ba74bap+4. */
+sv_f32_t
+__sv_expf_x (sv_f32_t x, const svbool_t pg)
+{
+ /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+
+ /* n = round(x/(ln2/N)). */
+ sv_f32_t z = sv_fma_n_f32_x (pg, InvLn2, x, sv_f32 (Shift));
+ sv_f32_t n = svsub_n_f32_x (pg, z, Shift);
+
+ /* r = x - n*ln2/N. */
+ sv_f32_t r = sv_fma_n_f32_x (pg, -Ln2hi, n, x);
+ r = sv_fma_n_f32_x (pg, -Ln2lo, n, r);
+
+/* scale = 2^(n/N). */
+#if SV_EXPF_USE_FEXPA
+ /* NaNs also need special handling with FEXPA. */
+ svbool_t is_special_case
+ = svorr_b_z (pg, svacgt_n_f32 (pg, x, Thres), svcmpne_f32 (pg, x, x));
+ sv_f32_t scale = svexpa_f32 (sv_as_u32_f32 (z));
+#else
+ sv_u32_t e = svlsl_n_u32_x (pg, sv_as_u32_f32 (z), 23);
+ svbool_t is_special_case = svacgt_n_f32 (pg, n, Thres);
+ sv_f32_t scale = sv_as_f32_u32 (svadd_n_u32_x (pg, e, 0x3f800000));
+#endif
+
+ /* y = exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4. */
+ sv_f32_t r2 = svmul_f32_x (pg, r, r);
+ sv_f32_t p = sv_fma_n_f32_x (pg, C (0), r, sv_f32 (C (1)));
+ sv_f32_t q = sv_fma_n_f32_x (pg, C (2), r, sv_f32 (C (3)));
+ q = sv_fma_f32_x (pg, p, r2, q);
+ p = svmul_n_f32_x (pg, r, C (4));
+ sv_f32_t poly = sv_fma_f32_x (pg, q, r2, p);
+
+ if (unlikely (svptest_any (pg, is_special_case)))
+#if SV_EXPF_USE_FEXPA
+ return special_case (x, sv_fma_f32_x (pg, poly, scale, scale),
+ is_special_case);
+#else
+ return specialcase (pg, poly, n, e, is_special_case, scale);
+#endif
+
+ return sv_fma_f32_x (pg, poly, scale, scale);
+}
+
+PL_ALIAS (__sv_expf_x, _ZGVsMxv_expf)
+
+PL_SIG (SV, F, 1, exp, -9.9, 9.9)
+PL_TEST_ULP (__sv_expf, 1.46)
+PL_TEST_INTERVAL (__sv_expf, 0, 0x1p-23, 40000)
+PL_TEST_INTERVAL (__sv_expf, 0x1p-23, 1, 50000)
+PL_TEST_INTERVAL (__sv_expf, 1, 0x1p23, 50000)
+PL_TEST_INTERVAL (__sv_expf, 0x1p23, inf, 50000)
+PL_TEST_INTERVAL (__sv_expf, -0, -0x1p-23, 40000)
+PL_TEST_INTERVAL (__sv_expf, -0x1p-23, -1, 50000)
+PL_TEST_INTERVAL (__sv_expf, -1, -0x1p23, 50000)
+PL_TEST_INTERVAL (__sv_expf, -0x1p23, -inf, 50000)
+#endif // SV_SUPPORTED
diff --git a/pl/math/sv_expf_data.c b/pl/math/sv_expf_data.c
new file mode 100644
index 0000000..6875adf
--- /dev/null
+++ b/pl/math/sv_expf_data.c
@@ -0,0 +1,12 @@
+/*
+ * Coefficients for single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Coefficients copied from the polynomial in math/v_expf.c. */
+const float __sv_expf_poly[] = {0x1.0e4020p-7f, 0x1.573e2ep-5f, 0x1.555e66p-3f,
+ 0x1.fffdb6p-2f, 0x1.ffffecp-1f};
diff --git a/pl/math/sv_log10_2u5.c b/pl/math/sv_log10_2u5.c
new file mode 100644
index 0000000..884e201
--- /dev/null
+++ b/pl/math/sv_log10_2u5.c
@@ -0,0 +1,89 @@
+/*
+ * Double-precision SVE log10(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define OFF 0x3fe6900900000000
+#define N (1 << V_LOG10_TABLE_BITS)
+
+#define A(i) __v_log10_data.poly[i]
+
+static inline sv_f64_t
+specialcase (sv_f64_t x, sv_f64_t y, svbool_t special)
+{
+ return sv_call_f64 (log10, x, y, special);
+}
+
+/* SVE log10 algorithm. Maximum measured error is 2.46 ulps.
+ __sv_log10(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6
+ want 0x1.fffbdf6eaa667p-6. */
+sv_f64_t
+__sv_log10_x (sv_f64_t x, const svbool_t pg)
+{
+ sv_u64_t ix = sv_as_u64_f64 (x);
+ sv_u64_t top = svlsr_n_u64_x (pg, ix, 48);
+
+ svbool_t is_special_case
+ = svcmpge_n_u64 (pg, svsub_n_u64_x (pg, top, 0x0010), 0x07ff0 - 0x0010);
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ sv_u64_t tmp = svsub_n_u64_x (pg, ix, OFF);
+ sv_u64_t i
+ = sv_mod_n_u64_x (pg, svlsr_n_u64_x (pg, tmp, 52 - V_LOG10_TABLE_BITS), N);
+ sv_f64_t k
+ = sv_to_f64_s64_x (pg, svasr_n_s64_x (pg, sv_as_s64_u64 (tmp), 52));
+ sv_f64_t z = sv_as_f64_u64 (
+ svsub_u64_x (pg, ix, svand_n_u64_x (pg, tmp, 0xfffULL << 52)));
+
+ /* log(x) = k*log(2) + log(c) + log(z/c). */
+
+ sv_u64_t idx = svmul_n_u64_x (pg, i, 2);
+ sv_f64_t invc = sv_lookup_f64_x (pg, &__v_log10_data.tab[0].invc, idx);
+ sv_f64_t logc = sv_lookup_f64_x (pg, &__v_log10_data.tab[0].log10c, idx);
+
+ /* We approximate log(z/c) with a polynomial P(x) ~= log(x + 1):
+ r = z/c - 1 (we look up precomputed 1/c)
+ log(z/c) ~= P(r). */
+ sv_f64_t r = sv_fma_f64_x (pg, z, invc, sv_f64 (-1.0));
+
+ /* hi = log(c) + k*log(2). */
+ sv_f64_t w = sv_fma_n_f64_x (pg, __v_log10_data.invln10, r, logc);
+ sv_f64_t hi = sv_fma_n_f64_x (pg, __v_log10_data.log10_2, k, w);
+
+ /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
+ sv_f64_t r2 = svmul_f64_x (pg, r, r);
+ sv_f64_t y = sv_fma_n_f64_x (pg, A (3), r, sv_f64 (A (2)));
+ sv_f64_t p = sv_fma_n_f64_x (pg, A (1), r, sv_f64 (A (0)));
+ y = sv_fma_n_f64_x (pg, A (4), r2, y);
+ y = sv_fma_f64_x (pg, y, r2, p);
+ y = sv_fma_f64_x (pg, y, r2, hi);
+
+ if (unlikely (svptest_any (pg, is_special_case)))
+ {
+ return specialcase (x, y, is_special_case);
+ }
+ return y;
+}
+
+PL_ALIAS (__sv_log10_x, _ZGVsMxv_log10)
+
+PL_SIG (SV, D, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (__sv_log10, 1.97)
+PL_TEST_INTERVAL (__sv_log10, -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (__sv_log10, 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (__sv_log10, 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (__sv_log10, 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (__sv_log10, 1.0, 100, 50000)
+PL_TEST_INTERVAL (__sv_log10, 100, inf, 50000)
+#endif
diff --git a/pl/math/sv_log10f_3u5.c b/pl/math/sv_log10f_3u5.c
new file mode 100644
index 0000000..e7b1e98
--- /dev/null
+++ b/pl/math/sv_log10f_3u5.c
@@ -0,0 +1,88 @@
+/*
+ * Single-precision SVE log10 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define SpecialCaseMin 0x00800000
+#define SpecialCaseMax 0x7f800000
+#define Offset 0x3f2aaaab /* 0.666667. */
+#define Mask 0x007fffff
+#define Ln2 0x1.62e43p-1f /* 0x3f317218. */
+#define InvLn10 0x1.bcb7b2p-2f
+
+#define P(i) __v_log10f_poly[i]
+
+static NOINLINE sv_f32_t
+special_case (sv_f32_t x, sv_f32_t y, svbool_t special)
+{
+ return sv_call_f32 (log10f, x, y, special);
+}
+
+/* Optimised implementation of SVE log10f using the same algorithm and
+ polynomial as v_log10f. Maximum error is 3.31ulps:
+ __sv_log10f(0x1.555c16p+0) got 0x1.ffe2fap-4
+ want 0x1.ffe2f4p-4. */
+sv_f32_t
+__sv_log10f_x (sv_f32_t x, const svbool_t pg)
+{
+ sv_u32_t ix = sv_as_u32_f32 (x);
+ svbool_t special_cases
+ = svcmpge_n_u32 (pg, svsub_n_u32_x (pg, ix, SpecialCaseMin),
+ SpecialCaseMax - SpecialCaseMin);
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ ix = svsub_n_u32_x (pg, ix, Offset);
+ sv_f32_t n = sv_to_f32_s32_x (pg, svasr_n_s32_x (pg, sv_as_s32_u32 (ix),
+ 23)); /* signextend. */
+ ix = svand_n_u32_x (pg, ix, Mask);
+ ix = svadd_n_u32_x (pg, ix, Offset);
+ sv_f32_t r = svsub_n_f32_x (pg, sv_as_f32_u32 (ix), 1.0f);
+
+ /* y = log10(1+r) + n*log10(2)
+ log10(1+r) ~ r * InvLn(10) + P(r)
+ where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for
+ log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3)
+
+ P(r) = r2 * (Q01 + r2 * (Q23 + r2 * (Q45 + r2 * Q67)))
+ and Qij = Pi + r * Pj. */
+ sv_f32_t q12 = sv_fma_n_f32_x (pg, P (1), r, sv_f32 (P (0)));
+ sv_f32_t q34 = sv_fma_n_f32_x (pg, P (3), r, sv_f32 (P (2)));
+ sv_f32_t q56 = sv_fma_n_f32_x (pg, P (5), r, sv_f32 (P (4)));
+ sv_f32_t q78 = sv_fma_n_f32_x (pg, P (7), r, sv_f32 (P (6)));
+
+ sv_f32_t r2 = svmul_f32_x (pg, r, r);
+ sv_f32_t y = sv_fma_f32_x (pg, q78, r2, q56);
+ y = sv_fma_f32_x (pg, y, r2, q34);
+ y = sv_fma_f32_x (pg, y, r2, q12);
+
+ /* Using p = Log10(2)*n + r*InvLn(10) is slightly faster but less
+ accurate. */
+ sv_f32_t p = sv_fma_n_f32_x (pg, Ln2, n, r);
+ y = sv_fma_f32_x (pg, y, r2, svmul_n_f32_x (pg, p, InvLn10));
+
+ if (unlikely (svptest_any (pg, special_cases)))
+ {
+ return special_case (x, y, special_cases);
+ }
+ return y;
+}
+
+PL_ALIAS (__sv_log10f_x, _ZGVsMxv_log10f)
+
+PL_SIG (SV, F, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (__sv_log10f, 2.82)
+PL_TEST_INTERVAL (__sv_log10f, -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (__sv_log10f, 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (__sv_log10f, 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (__sv_log10f, 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (__sv_log10f, 1.0, 100, 50000)
+PL_TEST_INTERVAL (__sv_log10f, 100, inf, 50000)
+#endif
diff --git a/pl/math/sv_log2_3u.c b/pl/math/sv_log2_3u.c
new file mode 100644
index 0000000..a0815bb
--- /dev/null
+++ b/pl/math/sv_log2_3u.c
@@ -0,0 +1,85 @@
+/*
+ * Double-precision SVE log2 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define InvLn2 sv_f64 (0x1.71547652b82fep0)
+#define N (1 << V_LOG2_TABLE_BITS)
+#define OFF 0x3fe6900900000000
+#define P(i) sv_f64 (__v_log2_data.poly[i])
+
+NOINLINE static sv_f64_t
+specialcase (sv_f64_t x, sv_f64_t y, const svbool_t cmp)
+{
+ return sv_call_f64 (log2, x, y, cmp);
+}
+
+/* Double-precision SVE log2 routine. Implements the same algorithm as vector
+ log10, with coefficients and table entries scaled in extended precision.
+ The maximum observed error is 2.58 ULP:
+ __v_log2(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
+ want 0x1.fffb34198d9ddp-5. */
+sv_f64_t
+__sv_log2_x (sv_f64_t x, const svbool_t pg)
+{
+ sv_u64_t ix = sv_as_u64_f64 (x);
+ sv_u64_t top = svlsr_n_u64_x (pg, ix, 48);
+
+ svbool_t special
+ = svcmpge_n_u64 (pg, svsub_n_u64_x (pg, top, 0x0010), 0x7ff0 - 0x0010);
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ sv_u64_t tmp = svsub_n_u64_x (pg, ix, OFF);
+ sv_u64_t i
+ = sv_mod_n_u64_x (pg, svlsr_n_u64_x (pg, tmp, 52 - V_LOG2_TABLE_BITS), N);
+ sv_f64_t k
+ = sv_to_f64_s64_x (pg, svasr_n_s64_x (pg, sv_as_s64_u64 (tmp), 52));
+ sv_f64_t z = sv_as_f64_u64 (
+ svsub_u64_x (pg, ix, svand_n_u64_x (pg, tmp, 0xfffULL << 52)));
+
+ sv_u64_t idx = svmul_n_u64_x (pg, i, 2);
+ sv_f64_t invc = sv_lookup_f64_x (pg, &__v_log2_data.tab[0].invc, idx);
+ sv_f64_t log2c = sv_lookup_f64_x (pg, &__v_log2_data.tab[0].log2c, idx);
+
+ /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */
+
+ sv_f64_t r = sv_fma_f64_x (pg, z, invc, sv_f64 (-1.0));
+ sv_f64_t w = sv_fma_f64_x (pg, r, InvLn2, log2c);
+
+ sv_f64_t r2 = svmul_f64_x (pg, r, r);
+ sv_f64_t p_23 = sv_fma_f64_x (pg, P (3), r, P (2));
+ sv_f64_t p_01 = sv_fma_f64_x (pg, P (1), r, P (0));
+ sv_f64_t y = sv_fma_f64_x (pg, P (4), r2, p_23);
+ y = sv_fma_f64_x (pg, y, r2, p_01);
+ y = sv_fma_f64_x (pg, y, r2, svadd_f64_x (pg, k, w));
+
+ if (unlikely (svptest_any (pg, special)))
+ {
+ return specialcase (x, y, special);
+ }
+ return y;
+}
+
+PL_ALIAS (__sv_log2_x, _ZGVsMxv_log2)
+
+PL_SIG (SV, D, 1, log2, 0.01, 11.1)
+PL_TEST_ULP (__sv_log2, 2.09)
+PL_TEST_EXPECT_FENV_ALWAYS (__sv_log2)
+PL_TEST_INTERVAL (__sv_log2, -0.0, -0x1p126, 1000)
+PL_TEST_INTERVAL (__sv_log2, 0.0, 0x1p-126, 4000)
+PL_TEST_INTERVAL (__sv_log2, 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (__sv_log2, 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (__sv_log2, 1.0, 100, 50000)
+PL_TEST_INTERVAL (__sv_log2, 100, inf, 50000)
+
+#endif
diff --git a/pl/math/sv_log2f_2u5.c b/pl/math/sv_log2f_2u5.c
new file mode 100644
index 0000000..fe2ab16
--- /dev/null
+++ b/pl/math/sv_log2f_2u5.c
@@ -0,0 +1,79 @@
+/*
+ * Single-precision vector/SVE log2 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define P(i) __v_log2f_data.poly[i]
+
+#define Ln2 (0x1.62e43p-1f) /* 0x3f317218. */
+#define Min (0x00800000)
+#define Max (0x7f800000)
+#define Mask (0x007fffff)
+#define Off (0x3f2aaaab) /* 0.666667. */
+
+static NOINLINE sv_f32_t
+specialcase (sv_f32_t x, sv_f32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (log2f, x, y, cmp);
+}
+
+/* Optimised implementation of SVE log2f, using the same algorithm
+ and polynomial as Neon log2f. Maximum error is 2.48 ULPs:
+ __sv_log2f(0x1.558174p+0) got 0x1.a9be84p-2
+ want 0x1.a9be8p-2. */
+sv_f32_t
+__sv_log2f_x (sv_f32_t x, const svbool_t pg)
+{
+ sv_u32_t u = sv_as_u32_f32 (x);
+ svbool_t special
+ = svcmpge_u32 (pg, svsub_n_u32_x (pg, u, Min), sv_u32 (Max - Min));
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u = svsub_n_u32_x (pg, u, Off);
+ sv_f32_t n = sv_to_f32_s32_x (pg, svasr_n_s32_x (pg, sv_as_s32_u32 (u),
+ 23)); /* Sign-extend. */
+ u = svand_n_u32_x (pg, u, Mask);
+ u = svadd_n_u32_x (pg, u, Off);
+ sv_f32_t r = svsub_n_f32_x (pg, sv_as_f32_u32 (u), 1.0f);
+
+ /* y = log2(1+r) + n. */
+ sv_f32_t r2 = svmul_f32_x (pg, r, r);
+
+ /* Evaluate polynomial using pairwise Horner scheme. */
+ sv_f32_t p67 = sv_fma_n_f32_x (pg, P (7), r, sv_f32 (P (6)));
+ sv_f32_t p45 = sv_fma_n_f32_x (pg, P (5), r, sv_f32 (P (4)));
+ sv_f32_t p23 = sv_fma_n_f32_x (pg, P (3), r, sv_f32 (P (2)));
+ sv_f32_t p01 = sv_fma_n_f32_x (pg, P (1), r, sv_f32 (P (0)));
+ sv_f32_t y;
+ y = sv_fma_n_f32_x (pg, P (8), r2, p67);
+ y = sv_fma_f32_x (pg, y, r2, p45);
+ y = sv_fma_f32_x (pg, y, r2, p23);
+ y = sv_fma_f32_x (pg, y, r2, p01);
+ y = sv_fma_f32_x (pg, y, r, n);
+
+ if (unlikely (svptest_any (pg, special)))
+ return specialcase (x, y, special);
+ return y;
+}
+
+PL_ALIAS (__sv_log2f_x, _ZGVsMxv_log2f)
+
+PL_SIG (SV, F, 1, log2, 0.01, 11.1)
+PL_TEST_ULP (__sv_log2f, 1.99)
+PL_TEST_EXPECT_FENV_ALWAYS (__sv_log2f)
+PL_TEST_INTERVAL (__sv_log2f, -0.0, -0x1p126, 4000)
+PL_TEST_INTERVAL (__sv_log2f, 0.0, 0x1p-126, 4000)
+PL_TEST_INTERVAL (__sv_log2f, 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (__sv_log2f, 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (__sv_log2f, 1.0, 100, 50000)
+PL_TEST_INTERVAL (__sv_log2f, 100, inf, 50000)
+
+#endif // SV_SUPPORTED
diff --git a/pl/math/sv_log_2u5.c b/pl/math/sv_log_2u5.c
new file mode 100644
index 0000000..7f06fd3
--- /dev/null
+++ b/pl/math/sv_log_2u5.c
@@ -0,0 +1,85 @@
+/*
+ * Double-precision SVE log(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define A(i) __sv_log_data.poly[i]
+#define Ln2 (0x1.62e42fefa39efp-1)
+#define N (1 << SV_LOG_TABLE_BITS)
+#define OFF (0x3fe6900900000000)
+
+double
+optr_aor_log_f64 (double);
+
+static NOINLINE sv_f64_t
+__sv_log_specialcase (sv_f64_t x, sv_f64_t y, svbool_t cmp)
+{
+ return sv_call_f64 (optr_aor_log_f64, x, y, cmp);
+}
+
+/* SVE port of Neon log algorithm from math/.
+ Maximum measured error is 2.17 ulp:
+ __sv_log(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
+ want 0x1.ffffff1cca045p-2. */
+sv_f64_t
+__sv_log_x (sv_f64_t x, const svbool_t pg)
+{
+ sv_u64_t ix = sv_as_u64_f64 (x);
+ sv_u64_t top = svlsr_n_u64_x (pg, ix, 48);
+ svbool_t cmp = svcmpge_u64 (pg, svsub_n_u64_x (pg, top, 0x0010),
+ sv_u64 (0x7ff0 - 0x0010));
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ sv_u64_t tmp = svsub_n_u64_x (pg, ix, OFF);
+ /* Equivalent to (tmp >> (52 - SV_LOG_TABLE_BITS)) % N, since N is a power
+ of 2. */
+ sv_u64_t i
+ = svand_n_u64_x (pg, svlsr_n_u64_x (pg, tmp, (52 - SV_LOG_TABLE_BITS)),
+ N - 1);
+ sv_s64_t k
+ = svasr_n_s64_x (pg, sv_as_s64_u64 (tmp), 52); /* Arithmetic shift. */
+ sv_u64_t iz = svsub_u64_x (pg, ix, svand_n_u64_x (pg, tmp, 0xfffULL << 52));
+ sv_f64_t z = sv_as_f64_u64 (iz);
+ /* Lookup in 2 global lists (length N). */
+ sv_f64_t invc = sv_lookup_f64_x (pg, __sv_log_data.invc, i);
+ sv_f64_t logc = sv_lookup_f64_x (pg, __sv_log_data.logc, i);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ sv_f64_t r = sv_fma_f64_x (pg, z, invc, sv_f64 (-1.0));
+ sv_f64_t kd = sv_to_f64_s64_x (pg, k);
+ /* hi = r + log(c) + k*Ln2. */
+ sv_f64_t hi = sv_fma_n_f64_x (pg, Ln2, kd, svadd_f64_x (pg, logc, r));
+ /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
+ sv_f64_t r2 = svmul_f64_x (pg, r, r);
+ sv_f64_t y = sv_fma_n_f64_x (pg, A (3), r, sv_f64 (A (2)));
+ sv_f64_t p = sv_fma_n_f64_x (pg, A (1), r, sv_f64 (A (0)));
+ y = sv_fma_n_f64_x (pg, A (4), r2, y);
+ y = sv_fma_f64_x (pg, y, r2, p);
+ y = sv_fma_f64_x (pg, y, r2, hi);
+
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_log_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_log_x, _ZGVsMxv_log)
+
+PL_SIG (SV, D, 1, log, 0.01, 11.1)
+PL_TEST_ULP (__sv_log, 1.68)
+PL_TEST_INTERVAL (__sv_log, -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (__sv_log, 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (__sv_log, 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (__sv_log, 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (__sv_log, 1.0, 100, 50000)
+PL_TEST_INTERVAL (__sv_log, 100, inf, 50000)
+#endif // SV_SUPPORTED
diff --git a/pl/math/sv_log_data.c b/pl/math/sv_log_data.c
new file mode 100644
index 0000000..77f9989
--- /dev/null
+++ b/pl/math/sv_log_data.c
@@ -0,0 +1,146 @@
+/*
+ * Coefficients for double-precision SVE log(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct sv_log_data __sv_log_data = {
+ /* All coefficients and table entries are copied from the Neon routine in
+ math/. See math/v_log_data.c for an explanation of the algorithm. */
+
+ .invc = {0x1.6a133d0dec120p+0, 0x1.6815f2f3e42edp+0,
+ 0x1.661e39be1ac9ep+0, 0x1.642bfa30ac371p+0,
+ 0x1.623f1d916f323p+0, 0x1.60578da220f65p+0,
+ 0x1.5e75349dea571p+0, 0x1.5c97fd387a75ap+0,
+ 0x1.5abfd2981f200p+0, 0x1.58eca051dc99cp+0,
+ 0x1.571e526d9df12p+0, 0x1.5554d555b3fcbp+0,
+ 0x1.539015e2a20cdp+0, 0x1.51d0014ee0164p+0,
+ 0x1.50148538cd9eep+0, 0x1.4e5d8f9f698a1p+0,
+ 0x1.4cab0edca66bep+0, 0x1.4afcf1a9db874p+0,
+ 0x1.495327136e16fp+0, 0x1.47ad9e84af28fp+0,
+ 0x1.460c47b39ae15p+0, 0x1.446f12b278001p+0,
+ 0x1.42d5efdd720ecp+0, 0x1.4140cfe001a0fp+0,
+ 0x1.3fafa3b421f69p+0, 0x1.3e225c9c8ece5p+0,
+ 0x1.3c98ec29a211ap+0, 0x1.3b13442a413fep+0,
+ 0x1.399156baa3c54p+0, 0x1.38131639b4cdbp+0,
+ 0x1.36987540fbf53p+0, 0x1.352166b648f61p+0,
+ 0x1.33adddb3eb575p+0, 0x1.323dcd99fc1d3p+0,
+ 0x1.30d129fefc7d2p+0, 0x1.2f67e6b72fe7dp+0,
+ 0x1.2e01f7cf8b187p+0, 0x1.2c9f518ddc86ep+0,
+ 0x1.2b3fe86e5f413p+0, 0x1.29e3b1211b25cp+0,
+ 0x1.288aa08b373cfp+0, 0x1.2734abcaa8467p+0,
+ 0x1.25e1c82459b81p+0, 0x1.2491eb1ad59c5p+0,
+ 0x1.23450a54048b5p+0, 0x1.21fb1bb09e578p+0,
+ 0x1.20b415346d8f7p+0, 0x1.1f6fed179a1acp+0,
+ 0x1.1e2e99b93c7b3p+0, 0x1.1cf011a7a882ap+0,
+ 0x1.1bb44b97dba5ap+0, 0x1.1a7b3e66cdd4fp+0,
+ 0x1.1944e11dc56cdp+0, 0x1.18112aebb1a6ep+0,
+ 0x1.16e013231b7e9p+0, 0x1.15b1913f156cfp+0,
+ 0x1.14859cdedde13p+0, 0x1.135c2dc68cfa4p+0,
+ 0x1.12353bdb01684p+0, 0x1.1110bf25b85b4p+0,
+ 0x1.0feeafd2f8577p+0, 0x1.0ecf062c51c3bp+0,
+ 0x1.0db1baa076c8bp+0, 0x1.0c96c5bb3048ep+0,
+ 0x1.0b7e20263e070p+0, 0x1.0a67c2acd0ce3p+0,
+ 0x1.0953a6391e982p+0, 0x1.0841c3caea380p+0,
+ 0x1.07321489b13eap+0, 0x1.062491aee9904p+0,
+ 0x1.05193497a7cc5p+0, 0x1.040ff6b5f5e9fp+0,
+ 0x1.0308d19aa6127p+0, 0x1.0203beedb0c67p+0,
+ 0x1.010037d38bcc2p+0, 1.0,
+ 0x1.fc06d493cca10p-1, 0x1.f81e6ac3b918fp-1,
+ 0x1.f44546ef18996p-1, 0x1.f07b10382c84bp-1,
+ 0x1.ecbf7070e59d4p-1, 0x1.e91213f715939p-1,
+ 0x1.e572a9a75f7b7p-1, 0x1.e1e0e2c530207p-1,
+ 0x1.de5c72d8a8be3p-1, 0x1.dae50fa5658ccp-1,
+ 0x1.d77a71145a2dap-1, 0x1.d41c51166623ep-1,
+ 0x1.d0ca6ba0bb29fp-1, 0x1.cd847e8e59681p-1,
+ 0x1.ca4a499693e00p-1, 0x1.c71b8e399e821p-1,
+ 0x1.c3f80faf19077p-1, 0x1.c0df92dc2b0ecp-1,
+ 0x1.bdd1de3cbb542p-1, 0x1.baceb9e1007a3p-1,
+ 0x1.b7d5ef543e55ep-1, 0x1.b4e749977d953p-1,
+ 0x1.b20295155478ep-1, 0x1.af279f8e82be2p-1,
+ 0x1.ac5638197fdf3p-1, 0x1.a98e2f102e087p-1,
+ 0x1.a6cf5606d05c1p-1, 0x1.a4197fc04d746p-1,
+ 0x1.a16c80293dc01p-1, 0x1.9ec82c4dc5bc9p-1,
+ 0x1.9c2c5a491f534p-1, 0x1.9998e1480b618p-1,
+ 0x1.970d9977c6c2dp-1, 0x1.948a5c023d212p-1,
+ 0x1.920f0303d6809p-1, 0x1.8f9b698a98b45p-1,
+ 0x1.8d2f6b81726f6p-1, 0x1.8acae5bb55badp-1,
+ 0x1.886db5d9275b8p-1, 0x1.8617ba567c13cp-1,
+ 0x1.83c8d27487800p-1, 0x1.8180de3c5dbe7p-1,
+ 0x1.7f3fbe71cdb71p-1, 0x1.7d055498071c1p-1,
+ 0x1.7ad182e54f65ap-1, 0x1.78a42c3c90125p-1,
+ 0x1.767d342f76944p-1, 0x1.745c7ef26b00ap-1,
+ 0x1.7241f15769d0fp-1, 0x1.702d70d396e41p-1,
+ 0x1.6e1ee3700cd11p-1, 0x1.6c162fc9cbe02p-1},
+
+ .logc = {-0x1.62fe995eb963ap-2, -0x1.5d5a48dad6b67p-2,
+ -0x1.57bde257d2769p-2, -0x1.52294fbf2af55p-2,
+ -0x1.4c9c7b598aa38p-2, -0x1.47174fc5ff560p-2,
+ -0x1.4199b7fa7b5cap-2, -0x1.3c239f48cfb99p-2,
+ -0x1.36b4f154d2aebp-2, -0x1.314d9a0ff32fbp-2,
+ -0x1.2bed85cca3cffp-2, -0x1.2694a11421af9p-2,
+ -0x1.2142d8d014fb2p-2, -0x1.1bf81a2c77776p-2,
+ -0x1.16b452a39c6a4p-2, -0x1.11776ffa6c67ep-2,
+ -0x1.0c416035020e0p-2, -0x1.071211aa10fdap-2,
+ -0x1.01e972e293b1bp-2, -0x1.f98ee587fd434p-3,
+ -0x1.ef5800ad716fbp-3, -0x1.e52e160484698p-3,
+ -0x1.db1104b19352ep-3, -0x1.d100ac59e0bd6p-3,
+ -0x1.c6fced287c3bdp-3, -0x1.bd05a7b317c29p-3,
+ -0x1.b31abd229164fp-3, -0x1.a93c0edadb0a3p-3,
+ -0x1.9f697ee30d7ddp-3, -0x1.95a2efa9aa40ap-3,
+ -0x1.8be843d796044p-3, -0x1.82395ecc477edp-3,
+ -0x1.7896240966422p-3, -0x1.6efe77aca8c55p-3,
+ -0x1.65723e117ec5cp-3, -0x1.5bf15c0955706p-3,
+ -0x1.527bb6c111da1p-3, -0x1.491133c939f8fp-3,
+ -0x1.3fb1b90c7fc58p-3, -0x1.365d2cc485f8dp-3,
+ -0x1.2d13758970de7p-3, -0x1.23d47a721fd47p-3,
+ -0x1.1aa0229f25ec2p-3, -0x1.117655ddebc3bp-3,
+ -0x1.0856fbf83ab6bp-3, -0x1.fe83fabbaa106p-4,
+ -0x1.ec6e8507a56cdp-4, -0x1.da6d68c7cc2eap-4,
+ -0x1.c88078462be0cp-4, -0x1.b6a786a423565p-4,
+ -0x1.a4e2676ac7f85p-4, -0x1.9330eea777e76p-4,
+ -0x1.8192f134d5ad9p-4, -0x1.70084464f0538p-4,
+ -0x1.5e90bdec5cb1fp-4, -0x1.4d2c3433c5536p-4,
+ -0x1.3bda7e219879ap-4, -0x1.2a9b732d27194p-4,
+ -0x1.196eeb2b10807p-4, -0x1.0854be8ef8a7ep-4,
+ -0x1.ee998cb277432p-5, -0x1.ccadb79919fb9p-5,
+ -0x1.aae5b1d8618b0p-5, -0x1.89413015d7442p-5,
+ -0x1.67bfe7bf158dep-5, -0x1.46618f83941bep-5,
+ -0x1.2525df1b0618ap-5, -0x1.040c8e2f77c6ap-5,
+ -0x1.c62aad39f738ap-6, -0x1.847fe3bdead9cp-6,
+ -0x1.43183683400acp-6, -0x1.01f31c4e1d544p-6,
+ -0x1.82201d1e6b69ap-7, -0x1.00dd0f3e1bfd6p-7,
+ -0x1.ff6fe1feb4e53p-9, 0.0,
+ 0x1.fe91885ec8e20p-8, 0x1.fc516f716296dp-7,
+ 0x1.7bb4dd70a015bp-6, 0x1.f84c99b34b674p-6,
+ 0x1.39f9ce4fb2d71p-5, 0x1.7756c0fd22e78p-5,
+ 0x1.b43ee82db8f3ap-5, 0x1.f0b3fced60034p-5,
+ 0x1.165bd78d4878ep-4, 0x1.3425d2715ebe6p-4,
+ 0x1.51b8bd91b7915p-4, 0x1.6f15632c76a47p-4,
+ 0x1.8c3c88ecbe503p-4, 0x1.a92ef077625dap-4,
+ 0x1.c5ed5745fa006p-4, 0x1.e27876de1c993p-4,
+ 0x1.fed104fce4cdcp-4, 0x1.0d7bd9c17d78bp-3,
+ 0x1.1b76986cef97bp-3, 0x1.295913d24f750p-3,
+ 0x1.37239fa295d17p-3, 0x1.44d68dd78714bp-3,
+ 0x1.52722ebe5d780p-3, 0x1.5ff6d12671f98p-3,
+ 0x1.6d64c2389484bp-3, 0x1.7abc4da40fddap-3,
+ 0x1.87fdbda1e8452p-3, 0x1.95295b06a5f37p-3,
+ 0x1.a23f6d34abbc5p-3, 0x1.af403a28e04f2p-3,
+ 0x1.bc2c06a85721ap-3, 0x1.c903161240163p-3,
+ 0x1.d5c5aa93287ebp-3, 0x1.e274051823fa9p-3,
+ 0x1.ef0e656300c16p-3, 0x1.fb9509f05aa2ap-3,
+ 0x1.04041821f37afp-2, 0x1.0a340a49b3029p-2,
+ 0x1.105a7918a126dp-2, 0x1.1677819812b84p-2,
+ 0x1.1c8b405b40c0ep-2, 0x1.2295d16cfa6b1p-2,
+ 0x1.28975066318a2p-2, 0x1.2e8fd855d86fcp-2,
+ 0x1.347f83d605e59p-2, 0x1.3a666d1244588p-2,
+ 0x1.4044adb6f8ec4p-2, 0x1.461a5f077558cp-2,
+ 0x1.4be799e20b9c8p-2, 0x1.51ac76a6b79dfp-2,
+ 0x1.57690d5744a45p-2, 0x1.5d1d758e45217p-2},
+
+ .poly = {-0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,
+ 0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3},
+};
diff --git a/pl/math/sv_logf_3u4.c b/pl/math/sv_logf_3u4.c
new file mode 100644
index 0000000..11f0b8a
--- /dev/null
+++ b/pl/math/sv_logf_3u4.c
@@ -0,0 +1,77 @@
+/*
+ * Single-precision vector log function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define P(i) __sv_logf_poly[i]
+
+#define Ln2 (0x1.62e43p-1f) /* 0x3f317218 */
+#define Min (0x00800000)
+#define Max (0x7f800000)
+#define Mask (0x007fffff)
+#define Off (0x3f2aaaab) /* 0.666667 */
+
+float
+optr_aor_log_f32 (float);
+
+static NOINLINE sv_f32_t
+__sv_logf_specialcase (sv_f32_t x, sv_f32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (optr_aor_log_f32, x, y, cmp);
+}
+
+/* Optimised implementation of SVE logf, using the same algorithm and polynomial
+ as the Neon routine in math/. Maximum error is 3.34 ULPs:
+ __sv_logf(0x1.557298p+0) got 0x1.26edecp-2
+ want 0x1.26ede6p-2. */
+sv_f32_t
+__sv_logf_x (sv_f32_t x, const svbool_t pg)
+{
+ sv_u32_t u = sv_as_u32_f32 (x);
+ svbool_t cmp
+ = svcmpge_u32 (pg, svsub_n_u32_x (pg, u, Min), sv_u32 (Max - Min));
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u = svsub_n_u32_x (pg, u, Off);
+ sv_f32_t n = sv_to_f32_s32_x (pg, svasr_n_s32_x (pg, sv_as_s32_u32 (u),
+ 23)); /* Sign-extend. */
+ u = svand_n_u32_x (pg, u, Mask);
+ u = svadd_n_u32_x (pg, u, Off);
+ sv_f32_t r = svsub_n_f32_x (pg, sv_as_f32_u32 (u), 1.0f);
+
+ /* y = log(1+r) + n*ln2. */
+ sv_f32_t r2 = svmul_f32_x (pg, r, r);
+ /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))). */
+ sv_f32_t p = sv_fma_n_f32_x (pg, P (1), r, sv_f32 (P (2)));
+ sv_f32_t q = sv_fma_n_f32_x (pg, P (3), r, sv_f32 (P (4)));
+ sv_f32_t y = sv_fma_n_f32_x (pg, P (5), r, sv_f32 (P (6)));
+ p = sv_fma_n_f32_x (pg, P (0), r2, p);
+ q = sv_fma_f32_x (pg, p, r2, q);
+ y = sv_fma_f32_x (pg, q, r2, y);
+ p = sv_fma_n_f32_x (pg, Ln2, n, r);
+ y = sv_fma_f32_x (pg, y, r2, p);
+
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_logf_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_logf_x, _ZGVsMxv_logf)
+
+PL_SIG (SV, F, 1, log, 0.01, 11.1)
+PL_TEST_ULP (__sv_logf, 2.85)
+PL_TEST_INTERVAL (__sv_logf, -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (__sv_logf, 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (__sv_logf, 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (__sv_logf, 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (__sv_logf, 1.0, 100, 50000)
+PL_TEST_INTERVAL (__sv_logf, 100, inf, 50000)
+#endif // SV_SUPPORTED
diff --git a/pl/math/sv_logf_data.c b/pl/math/sv_logf_data.c
new file mode 100644
index 0000000..51dd7a7
--- /dev/null
+++ b/pl/math/sv_logf_data.c
@@ -0,0 +1,12 @@
+/*
+ * Coefficients for single-precision SVE log function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+const float __sv_logf_poly[] = {
+ /* Copied from coeffs for the Neon routine in math/. */
+ -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, -0x1.4f9934p-3f, 0x1.961348p-3f,
+ -0x1.00187cp-2f, 0x1.555d7cp-2f, -0x1.ffffc8p-2f,
+};
diff --git a/pl/math/sv_math.h b/pl/math/sv_math.h
new file mode 100644
index 0000000..5ef0ad3
--- /dev/null
+++ b/pl/math/sv_math.h
@@ -0,0 +1,245 @@
+/*
+ * Wrapper functions for SVE ACLE.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef SV_MATH_H
+#define SV_MATH_H
+
+#ifndef WANT_VMATH
+/* Enable the build of vector math code. */
+#define WANT_VMATH 1
+#endif
+#if WANT_VMATH
+
+#if WANT_SVE_MATH
+#define SV_SUPPORTED 1
+
+#include <arm_sve.h>
+#include <stdbool.h>
+
+#include "math_config.h"
+
+typedef float f32_t;
+typedef uint32_t u32_t;
+typedef int32_t s32_t;
+typedef double f64_t;
+typedef uint64_t u64_t;
+typedef int64_t s64_t;
+
+typedef svfloat64_t sv_f64_t;
+typedef svuint64_t sv_u64_t;
+typedef svint64_t sv_s64_t;
+
+typedef svfloat32_t sv_f32_t;
+typedef svuint32_t sv_u32_t;
+typedef svint32_t sv_s32_t;
+
+/* Double precision. */
+static inline sv_s64_t
+sv_s64 (s64_t x)
+{
+ return svdup_n_s64 (x);
+}
+
+static inline sv_u64_t
+sv_u64 (u64_t x)
+{
+ return svdup_n_u64 (x);
+}
+
+static inline sv_f64_t
+sv_f64 (f64_t x)
+{
+ return svdup_n_f64 (x);
+}
+
+static inline sv_f64_t
+sv_fma_f64_x (svbool_t pg, sv_f64_t x, sv_f64_t y, sv_f64_t z)
+{
+ return svmla_f64_x (pg, z, x, y);
+}
+
+/* res = z + x * y with x scalar. */
+static inline sv_f64_t
+sv_fma_n_f64_x (svbool_t pg, f64_t x, sv_f64_t y, sv_f64_t z)
+{
+ return svmla_n_f64_x (pg, z, y, x);
+}
+
+static inline sv_s64_t
+sv_as_s64_u64 (sv_u64_t x)
+{
+ return svreinterpret_s64_u64 (x);
+}
+
+static inline sv_u64_t
+sv_as_u64_f64 (sv_f64_t x)
+{
+ return svreinterpret_u64_f64 (x);
+}
+
+static inline sv_f64_t
+sv_as_f64_u64 (sv_u64_t x)
+{
+ return svreinterpret_f64_u64 (x);
+}
+
+static inline sv_f64_t
+sv_to_f64_s64_x (svbool_t pg, sv_s64_t s)
+{
+ return svcvt_f64_x (pg, s);
+}
+
+static inline sv_f64_t
+sv_call_f64 (f64_t (*f) (f64_t), sv_f64_t x, sv_f64_t y, svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ f64_t elem = svclastb_n_f64 (p, 0, x);
+ elem = (*f) (elem);
+ sv_f64_t y2 = svdup_n_f64 (elem);
+ y = svsel_f64 (p, y2, y);
+ p = svpnext_b64 (cmp, p);
+ }
+ return y;
+}
+
+static inline sv_f64_t
+sv_call2_f64 (f64_t (*f) (f64_t, f64_t), sv_f64_t x1, sv_f64_t x2, sv_f64_t y,
+ svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ f64_t elem1 = svclastb_n_f64 (p, 0, x1);
+ f64_t elem2 = svclastb_n_f64 (p, 0, x2);
+ f64_t ret = (*f) (elem1, elem2);
+ sv_f64_t y2 = svdup_n_f64 (ret);
+ y = svsel_f64 (p, y2, y);
+ p = svpnext_b64 (cmp, p);
+ }
+ return y;
+}
+
+/* Load array of uint64_t into svuint64_t. */
+static inline sv_u64_t
+sv_lookup_u64_x (svbool_t pg, const u64_t *tab, sv_u64_t idx)
+{
+ return svld1_gather_u64index_u64 (pg, tab, idx);
+}
+
+/* Load array of double into svfloat64_t. */
+static inline sv_f64_t
+sv_lookup_f64_x (svbool_t pg, const f64_t *tab, sv_u64_t idx)
+{
+ return svld1_gather_u64index_f64 (pg, tab, idx);
+}
+
+static inline sv_u64_t
+sv_mod_n_u64_x (svbool_t pg, sv_u64_t x, u64_t y)
+{
+ sv_u64_t q = svdiv_n_u64_x (pg, x, y);
+ return svmls_n_u64_x (pg, x, q, y);
+}
+
+/* Single precision. */
+static inline sv_s32_t
+sv_s32 (s32_t x)
+{
+ return svdup_n_s32 (x);
+}
+
+static inline sv_u32_t
+sv_u32 (u32_t x)
+{
+ return svdup_n_u32 (x);
+}
+
+static inline sv_f32_t
+sv_f32 (f32_t x)
+{
+ return svdup_n_f32 (x);
+}
+
+static inline sv_f32_t
+sv_fma_f32_x (svbool_t pg, sv_f32_t x, sv_f32_t y, sv_f32_t z)
+{
+ return svmla_f32_x (pg, z, x, y);
+}
+
+/* res = z + x * y with x scalar. */
+static inline sv_f32_t
+sv_fma_n_f32_x (svbool_t pg, f32_t x, sv_f32_t y, sv_f32_t z)
+{
+ return svmla_n_f32_x (pg, z, y, x);
+}
+
+static inline sv_u32_t
+sv_as_u32_f32 (sv_f32_t x)
+{
+ return svreinterpret_u32_f32 (x);
+}
+
+static inline sv_f32_t
+sv_as_f32_u32 (sv_u32_t x)
+{
+ return svreinterpret_f32_u32 (x);
+}
+
+static inline sv_s32_t
+sv_as_s32_u32 (sv_u32_t x)
+{
+ return svreinterpret_s32_u32 (x);
+}
+
+static inline sv_f32_t
+sv_to_f32_s32_x (svbool_t pg, sv_s32_t s)
+{
+ return svcvt_f32_x (pg, s);
+}
+
+static inline sv_s32_t
+sv_to_s32_f32_x (svbool_t pg, sv_f32_t x)
+{
+ return svcvt_s32_f32_x (pg, x);
+}
+
+static inline sv_f32_t
+sv_call_f32 (f32_t (*f) (f32_t), sv_f32_t x, sv_f32_t y, svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ f32_t elem = svclastb_n_f32 (p, 0, x);
+ elem = (*f) (elem);
+ sv_f32_t y2 = svdup_n_f32 (elem);
+ y = svsel_f32 (p, y2, y);
+ p = svpnext_b32 (cmp, p);
+ }
+ return y;
+}
+
+static inline sv_f32_t
+sv_call2_f32 (f32_t (*f) (f32_t, f32_t), sv_f32_t x1, sv_f32_t x2, sv_f32_t y,
+ svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ f32_t elem1 = svclastb_n_f32 (p, 0, x1);
+ f32_t elem2 = svclastb_n_f32 (p, 0, x2);
+ f32_t ret = (*f) (elem1, elem2);
+ sv_f32_t y2 = svdup_n_f32 (ret);
+ y = svsel_f32 (p, y2, y);
+ p = svpnext_b32 (cmp, p);
+ }
+ return y;
+}
+
+#endif
+#endif
+#endif
diff --git a/pl/math/sv_powi.c b/pl/math/sv_powi.c
new file mode 100644
index 0000000..1bb0eb3
--- /dev/null
+++ b/pl/math/sv_powi.c
@@ -0,0 +1,53 @@
+/*
+ * Double-precision SVE powi(x, n) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#if SV_SUPPORTED
+
+/* Optimized double-precision vector powi (double base, long integer power).
+ powi is developed for environments in which accuracy is of much less
+ importance than performance, hence we provide no estimate for worst-case
+ error. */
+svfloat64_t
+__sv_powi_x (svfloat64_t as, svint64_t ns, svbool_t p)
+{
+ /* Compute powi by successive squaring, right to left. */
+ svfloat64_t acc = svdup_n_f64 (1.0);
+ svbool_t want_recip = svcmplt_n_s64 (p, ns, 0);
+ svuint64_t ns_abs = svreinterpret_u64_s64 (svabs_s64_x (p, ns));
+
+ /* We use a max to avoid needing to check whether any lane != 0 on each
+ iteration. */
+ uint64_t max_n = svmaxv_u64 (p, ns_abs);
+
+ svfloat64_t c = as;
+ /* Successively square c, and use merging predication (_m) to determine
+ whether or not to perform the multiplication or keep the previous
+ iteration. */
+ while (true)
+ {
+ svbool_t px = svcmpeq_n_u64 (p, svand_n_u64_x (p, ns_abs, 1ull), 1ull);
+ acc = svmul_f64_m (px, acc, c);
+ max_n >>= 1;
+ if (max_n == 0)
+ break;
+
+ ns_abs = svlsr_n_u64_x (p, ns_abs, 1);
+ c = svmul_f64_x (p, c, c);
+ }
+
+ /* Negative powers are handled by computing the abs(n) version and then
+ taking the reciprocal. */
+ if (svptest_any (want_recip, want_recip))
+ acc = svdivr_n_f64_m (want_recip, acc, 1.0);
+
+ return acc;
+}
+
+strong_alias (__sv_powi_x, _ZGVsMxvv_powk)
+
+#endif // SV_SUPPORTED
diff --git a/pl/math/sv_powif.c b/pl/math/sv_powif.c
new file mode 100644
index 0000000..d0567e3
--- /dev/null
+++ b/pl/math/sv_powif.c
@@ -0,0 +1,54 @@
+/*
+ * Single-precision SVE powi(x, n) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#if SV_SUPPORTED
+
+/* Optimized single-precision vector powi (float base, integer power).
+ powi is developed for environments in which accuracy is of much less
+ importance than performance, hence we provide no estimate for worst-case
+ error. */
+svfloat32_t
+__sv_powif_x (svfloat32_t as, svint32_t ns, svbool_t p)
+{
+ /* Compute powi by successive squaring, right to left. */
+ svfloat32_t acc = svdup_n_f32 (1.f);
+ svbool_t want_recip = svcmplt_n_s32 (p, ns, 0);
+ svuint32_t ns_abs = svreinterpret_u32_s32 (svabs_s32_x (p, ns));
+
+ /* We use a max to avoid needing to check whether any lane != 0 on each
+ iteration. */
+ uint32_t max_n = svmaxv_u32 (p, ns_abs);
+
+ svfloat32_t c = as;
+ /* Successively square c, and use merging predication (_m) to determine
+ whether or not to perform the multiplication or keep the previous
+ iteration. */
+ while (true)
+ {
+ svbool_t px = svcmpeq_n_u32 (p, svand_n_u32_x (p, ns_abs, 1), 1);
+ acc = svmul_f32_m (px, acc, c);
+ max_n >>= 1;
+ if (max_n == 0)
+ break;
+
+ ns_abs = svlsr_n_u32_x (p, ns_abs, 1);
+ c = svmul_f32_x (p, c, c);
+ }
+
+ /* Negative powers are handled by computing the abs(n) version and then
+ taking the reciprocal. */
+ if (svptest_any (want_recip, want_recip))
+ acc = svdivr_n_f32_m (want_recip, acc, 1.0f);
+
+ return acc;
+}
+
+/* Note no trailing f for ZGV... name - 64-bit integer version is powk. */
+strong_alias (__sv_powif_x, _ZGVsMxvv_powi)
+
+#endif // SV_SUPPORTED
diff --git a/pl/math/sv_sin_3u.c b/pl/math/sv_sin_3u.c
new file mode 100644
index 0000000..3fee080
--- /dev/null
+++ b/pl/math/sv_sin_3u.c
@@ -0,0 +1,89 @@
+/*
+ * Double-precision SVE sin(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define InvPi (sv_f64 (0x1.45f306dc9c883p-2))
+#define HalfPi (sv_f64 (0x1.921fb54442d18p+0))
+#define InvPio2 (sv_f64 (0x1.45f306dc9c882p-1))
+#define NegPio2_1 (sv_f64 (-0x1.921fb50000000p+0))
+#define NegPio2_2 (sv_f64 (-0x1.110b460000000p-26))
+#define NegPio2_3 (sv_f64 (-0x1.1a62633145c07p-54))
+#define Shift (sv_f64 (0x1.8p52))
+#define RangeVal (sv_f64 (0x1p23))
+#define AbsMask (0x7fffffffffffffff)
+
+static NOINLINE sv_f64_t
+__sv_sin_specialcase (sv_f64_t x, sv_f64_t y, svbool_t cmp)
+{
+ return sv_call_f64 (sin, x, y, cmp);
+}
+
+/* A fast SVE implementation of sin based on trigonometric
+ instructions (FTMAD, FTSSEL, FTSMUL).
+ Maximum observed error in 2.52 ULP:
+ __sv_sin(0x1.2d2b00df69661p+19) got 0x1.10ace8f3e786bp-40
+ want 0x1.10ace8f3e7868p-40. */
+sv_f64_t
+__sv_sin_x (sv_f64_t x, const svbool_t pg)
+{
+ sv_f64_t n, r, r2, y;
+ sv_u64_t sign;
+ svbool_t cmp;
+
+ r = sv_as_f64_u64 (svand_n_u64_x (pg, sv_as_u64_f64 (x), AbsMask));
+ sign = svand_n_u64_x (pg, sv_as_u64_f64 (x), ~AbsMask);
+ cmp = svcmpge_u64 (pg, sv_as_u64_f64 (r), sv_as_u64_f64 (RangeVal));
+
+ /* n = rint(|x|/(pi/2)). */
+ sv_f64_t q = sv_fma_f64_x (pg, InvPio2, r, Shift);
+ n = svsub_f64_x (pg, q, Shift);
+
+ /* r = |x| - n*(pi/2) (range reduction into -pi/4 .. pi/4). */
+ r = sv_fma_f64_x (pg, NegPio2_1, n, r);
+ r = sv_fma_f64_x (pg, NegPio2_2, n, r);
+ r = sv_fma_f64_x (pg, NegPio2_3, n, r);
+
+ /* Final multiplicative factor: 1.0 or x depending on bit #0 of q. */
+ sv_f64_t f = svtssel_f64 (r, sv_as_u64_f64 (q));
+
+ /* sin(r) poly approx. */
+ r2 = svtsmul_f64 (r, sv_as_u64_f64 (q));
+ y = sv_f64 (0.0);
+ y = svtmad_f64 (y, r2, 7);
+ y = svtmad_f64 (y, r2, 6);
+ y = svtmad_f64 (y, r2, 5);
+ y = svtmad_f64 (y, r2, 4);
+ y = svtmad_f64 (y, r2, 3);
+ y = svtmad_f64 (y, r2, 2);
+ y = svtmad_f64 (y, r2, 1);
+ y = svtmad_f64 (y, r2, 0);
+
+ /* Apply factor. */
+ y = svmul_f64_x (pg, f, y);
+
+ /* sign = y^sign. */
+ y = sv_as_f64_u64 (sveor_u64_x (pg, sv_as_u64_f64 (y), sign));
+
+ /* No need to pass pg to specialcase here since cmp is a strict subset,
+ guaranteed by the cmpge above. */
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_sin_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_sin_x, _ZGVsMxv_sin)
+
+PL_SIG (SV, D, 1, sin, -3.1, 3.1)
+PL_TEST_ULP (__sv_sin, 2.03)
+PL_TEST_INTERVAL (__sv_sin, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (__sv_sin, 0x1p-4, 0x1p4, 500000)
+#endif
diff --git a/pl/math/sv_sinf_1u9.c b/pl/math/sv_sinf_1u9.c
new file mode 100644
index 0000000..9184ccd
--- /dev/null
+++ b/pl/math/sv_sinf_1u9.c
@@ -0,0 +1,84 @@
+/*
+ * Single-precision SVE sin(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+#define A3 (sv_f32 (__sv_sinf_data.coeffs[3]))
+#define A5 (sv_f32 (__sv_sinf_data.coeffs[2]))
+#define A7 (sv_f32 (__sv_sinf_data.coeffs[1]))
+#define A9 (sv_f32 (__sv_sinf_data.coeffs[0]))
+
+#define NegPi1 (sv_f32 (-0x1.921fb6p+1f))
+#define NegPi2 (sv_f32 (0x1.777a5cp-24f))
+#define NegPi3 (sv_f32 (0x1.ee59dap-49f))
+#define RangeVal (sv_f32 (0x1p20f))
+#define InvPi (sv_f32 (0x1.45f306p-2f))
+#define Shift (sv_f32 (0x1.8p+23f))
+#define AbsMask (0x7fffffff)
+
+static NOINLINE sv_f32_t
+__sv_sinf_specialcase (sv_f32_t x, sv_f32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (sinf, x, y, cmp);
+}
+
+/* A fast SVE implementation of sinf.
+ Maximum error: 1.89 ULPs.
+ This maximum error is achieved at multiple values in [-2^18, 2^18]
+ but one example is:
+ __sv_sinf(0x1.9247a4p+0) got 0x1.fffff6p-1 want 0x1.fffffap-1. */
+sv_f32_t
+__sv_sinf_x (sv_f32_t x, const svbool_t pg)
+{
+ sv_f32_t n, r, r2, y;
+ sv_u32_t sign, odd;
+ svbool_t cmp;
+
+ r = sv_as_f32_u32 (svand_n_u32_x (pg, sv_as_u32_f32 (x), AbsMask));
+ sign = svand_n_u32_x (pg, sv_as_u32_f32 (x), ~AbsMask);
+ cmp = svcmpge_u32 (pg, sv_as_u32_f32 (r), sv_as_u32_f32 (RangeVal));
+
+ /* n = rint(|x|/pi). */
+ n = sv_fma_f32_x (pg, InvPi, r, Shift);
+ odd = svlsl_n_u32_x (pg, sv_as_u32_f32 (n), 31);
+ n = svsub_f32_x (pg, n, Shift);
+
+ /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */
+ r = sv_fma_f32_x (pg, NegPi1, n, r);
+ r = sv_fma_f32_x (pg, NegPi2, n, r);
+ r = sv_fma_f32_x (pg, NegPi3, n, r);
+
+ /* sin(r) approx using a degree 9 polynomial from the Taylor series
+ expansion. Note that only the odd terms of this are non-zero. */
+ r2 = svmul_f32_x (pg, r, r);
+ y = sv_fma_f32_x (pg, A9, r2, A7);
+ y = sv_fma_f32_x (pg, y, r2, A5);
+ y = sv_fma_f32_x (pg, y, r2, A3);
+ y = sv_fma_f32_x (pg, svmul_f32_x (pg, y, r2), r, r);
+
+ /* sign = y^sign^odd. */
+ y = sv_as_f32_u32 (
+ sveor_u32_x (pg, sv_as_u32_f32 (y), sveor_u32_x (pg, sign, odd)));
+
+ /* No need to pass pg to specialcase here since cmp is a strict subset,
+ guaranteed by the cmpge above. */
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_sinf_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_sinf_x, _ZGVsMxv_sinf)
+
+PL_SIG (SV, F, 1, sin, -3.1, 3.1)
+PL_TEST_ULP (__sv_sinf, 1.40)
+PL_TEST_INTERVAL (__sv_sinf, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (__sv_sinf, 0x1p-4, 0x1p4, 500000)
+#endif
diff --git a/pl/math/sv_sinf_poly_data.c b/pl/math/sv_sinf_poly_data.c
new file mode 100644
index 0000000..1e1ab5e
--- /dev/null
+++ b/pl/math/sv_sinf_poly_data.c
@@ -0,0 +1,19 @@
+/*
+ * Data used in single-precision sin(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Polynomial coefficients for approximating sin(x) in single
+ precision. These are the non-zero coefficients from the
+ degree 9 Taylor series expansion of sin. */
+
+const struct sv_sinf_data __sv_sinf_data = {.coeffs = {
+ 0x1.5b2e76p-19f,
+ -0x1.9f42eap-13f,
+ 0x1.110df4p-7f,
+ -0x1.555548p-3f,
+ }};
diff --git a/pl/math/sv_tanf_3u5.c b/pl/math/sv_tanf_3u5.c
new file mode 100644
index 0000000..cca43bd
--- /dev/null
+++ b/pl/math/sv_tanf_3u5.c
@@ -0,0 +1,112 @@
+/*
+ * Single-precision vector tan(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if SV_SUPPORTED
+
+/* Constants. */
+#define NegPio2_1 (sv_f32 (-0x1.921fb6p+0f))
+#define NegPio2_2 (sv_f32 (0x1.777a5cp-25f))
+#define NegPio2_3 (sv_f32 (0x1.ee59dap-50f))
+#define InvPio2 (sv_f32 (0x1.45f306p-1f))
+#define RangeVal (sv_f32 (0x1p15f))
+#define Shift (sv_f32 (0x1.8p+23f))
+
+#define poly(i) sv_f32 (__tanf_poly_data.poly_tan[i])
+
+/* Use full Estrin's scheme to evaluate polynomial. */
+static inline sv_f32_t
+eval_poly (svbool_t pg, sv_f32_t z)
+{
+ sv_f32_t z2 = svmul_f32_x (pg, z, z);
+ sv_f32_t z4 = svmul_f32_x (pg, z2, z2);
+ sv_f32_t y_10 = sv_fma_f32_x (pg, z, poly (1), poly (0));
+ sv_f32_t y_32 = sv_fma_f32_x (pg, z, poly (3), poly (2));
+ sv_f32_t y_54 = sv_fma_f32_x (pg, z, poly (5), poly (4));
+ sv_f32_t y_32_10 = sv_fma_f32_x (pg, z2, y_32, y_10);
+ sv_f32_t y = sv_fma_f32_x (pg, z4, y_54, y_32_10);
+ return y;
+}
+
+static NOINLINE sv_f32_t
+__sv_tanf_specialcase (sv_f32_t x, sv_f32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (tanf, x, y, cmp);
+}
+
+/* Fast implementation of SVE tanf.
+ Maximum error is 3.45 ULP:
+ __sv_tanf(-0x1.e5f0cap+13) got 0x1.ff9856p-1
+ want 0x1.ff9850p-1. */
+sv_f32_t
+__sv_tanf_x (sv_f32_t x, const svbool_t pg)
+{
+ /* Determine whether input is too large to perform fast regression. */
+ svbool_t cmp = svacge_f32 (pg, x, RangeVal);
+ svbool_t pred_minuszero = svcmpeq_f32 (pg, x, sv_f32 (-0.0));
+
+ /* n = rint(x/(pi/2)). */
+ sv_f32_t q = sv_fma_f32_x (pg, InvPio2, x, Shift);
+ sv_f32_t n = svsub_f32_x (pg, q, Shift);
+ /* n is already a signed integer, simply convert it. */
+ sv_s32_t in = sv_to_s32_f32_x (pg, n);
+ /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */
+ sv_s32_t alt = svand_s32_x (pg, in, sv_s32 (1));
+ svbool_t pred_alt = svcmpne_s32 (pg, alt, sv_s32 (0));
+
+ /* r = x - n * (pi/2) (range reduction into 0 .. pi/4). */
+ sv_f32_t r;
+ r = sv_fma_f32_x (pg, NegPio2_1, n, x);
+ r = sv_fma_f32_x (pg, NegPio2_2, n, r);
+ r = sv_fma_f32_x (pg, NegPio2_3, n, r);
+
+ /* If x lives in an interval, where |tan(x)|
+ - is finite, then use a polynomial approximation of the form
+ tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
+ - grows to infinity then use symmetries of tangent and the identity
+ tan(r) = cotan(pi/2 - r) to express tan(x) as 1/tan(-r). Finally, use
+ the same polynomial approximation of tan as above. */
+
+ /* Perform additional reduction if required. */
+ sv_f32_t z = svneg_f32_m (r, pred_alt, r);
+
+ /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4]. */
+ sv_f32_t z2 = svmul_f32_x (pg, z, z);
+ sv_f32_t p = eval_poly (pg, z2);
+ sv_f32_t y = sv_fma_f32_x (pg, svmul_f32_x (pg, z, z2), p, z);
+
+ /* Transform result back, if necessary. */
+ sv_f32_t inv_y = svdiv_f32_x (pg, sv_f32 (1.0f), y);
+ y = svsel_f32 (pred_alt, inv_y, y);
+
+ /* Fast reduction does not handle the x = -0.0 case well,
+ therefore it is fixed here. */
+ y = svsel_f32 (pred_minuszero, x, y);
+
+ /* No need to pass pg to specialcase here since cmp is a strict subset,
+ guaranteed by the cmpge above. */
+ if (unlikely (svptest_any (pg, cmp)))
+ return __sv_tanf_specialcase (x, y, cmp);
+ return y;
+}
+
+PL_ALIAS (__sv_tanf_x, _ZGVsMxv_tanf)
+
+PL_SIG (SV, F, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (__sv_tanf, 2.96)
+PL_TEST_INTERVAL (__sv_tanf, -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (__sv_tanf, 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (__sv_tanf, 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (__sv_tanf, 0x1p-23, 0.7, 50000)
+PL_TEST_INTERVAL (__sv_tanf, 0.7, 1.5, 50000)
+PL_TEST_INTERVAL (__sv_tanf, 1.5, 100, 50000)
+PL_TEST_INTERVAL (__sv_tanf, 100, 0x1p17, 50000)
+PL_TEST_INTERVAL (__sv_tanf, 0x1p17, inf, 50000)
+#endif
diff --git a/pl/math/tanf_3u3.c b/pl/math/tanf_3u3.c
new file mode 100644
index 0000000..ec006dc
--- /dev/null
+++ b/pl/math/tanf_3u3.c
@@ -0,0 +1,202 @@
+/*
+ * Single-precision scalar tan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "pairwise_hornerf.h"
+
+/* Useful constants. */
+#define NegPio2_1 (-0x1.921fb6p+0f)
+#define NegPio2_2 (0x1.777a5cp-25f)
+#define NegPio2_3 (0x1.ee59dap-50f)
+/* Reduced from 0x1p20 to 0x1p17 to ensure 3.5ulps. */
+#define RangeVal (0x1p17f)
+#define InvPio2 ((0x1.45f306p-1f))
+#define Shift (0x1.8p+23f)
+#define AbsMask (0x7fffffff)
+#define Pio4 (0x1.921fb6p-1)
+/* 2PI * 2^-64. */
+#define Pio2p63 (0x1.921FB54442D18p-62)
+
+#define P(i) __tanf_poly_data.poly_tan[i]
+#define Q(i) __tanf_poly_data.poly_cotan[i]
+
+static inline float
+eval_P (float z)
+{
+ return PAIRWISE_HORNER_5 (z, z * z, P);
+}
+
+static inline float
+eval_Q (float z)
+{
+ return PAIRWISE_HORNER_3 (z, z * z, Q);
+}
+
+/* Reduction of the input argument x using Cody-Waite approach, such that x = r
+ + n * pi/2 with r lives in [-pi/4, pi/4] and n is a signed integer. */
+static inline float
+reduce (float x, int32_t *in)
+{
+ /* n = rint(x/(pi/2)). */
+ float r = x;
+ float q = fmaf (InvPio2, r, Shift);
+ float n = q - Shift;
+ /* There is no rounding here, n is representable by a signed integer. */
+ *in = (int32_t) n;
+ /* r = x - n * (pi/2) (range reduction into -pi/4 .. pi/4). */
+ r = fmaf (NegPio2_1, n, r);
+ r = fmaf (NegPio2_2, n, r);
+ r = fmaf (NegPio2_3, n, r);
+ return r;
+}
+
+/* Table with 4/PI to 192 bit precision. To avoid unaligned accesses
+ only 8 new bits are added per entry, making the table 4 times larger. */
+static const uint32_t __inv_pio4[24]
+ = {0x000000a2, 0x0000a2f9, 0x00a2f983, 0xa2f9836e, 0xf9836e4e, 0x836e4e44,
+ 0x6e4e4415, 0x4e441529, 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
+ 0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 0x34ddc0db, 0xddc0db62,
+ 0xc0db6295, 0xdb629599, 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041};
+
+/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
+ XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
+ Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
+ Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
+ multiply computes the exact 2.62-bit fixed-point modulo. Since the result
+ can have at most 29 leading zeros after the binary point, the double
+ precision result is accurate to 33 bits. */
+static inline double
+reduce_large (uint32_t xi, int *np)
+{
+ const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
+ int shift = (xi >> 23) & 7;
+ uint64_t n, res0, res1, res2;
+
+ xi = (xi & 0xffffff) | 0x800000;
+ xi <<= shift;
+
+ res0 = xi * arr[0];
+ res1 = (uint64_t) xi * arr[4];
+ res2 = (uint64_t) xi * arr[8];
+ res0 = (res2 >> 32) | (res0 << 32);
+ res0 += res1;
+
+ n = (res0 + (1ULL << 61)) >> 62;
+ res0 -= n << 62;
+ double x = (int64_t) res0;
+ *np = n;
+ return x * Pio2p63;
+}
+
+/* Top 12 bits of the float representation with the sign bit cleared. */
+static inline uint32_t
+top12 (float x)
+{
+ return (asuint (x) >> 20);
+}
+
+/* Fast single-precision tan implementation.
+ Maximum ULP error: 3.293ulps.
+ tanf(0x1.c849eap+16) got -0x1.fe8d98p-1 want -0x1.fe8d9ep-1. */
+float
+tanf (float x)
+{
+ /* Get top words. */
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 20;
+
+ /* Dispatch between no reduction (small numbers), fast reduction and
+ slow large numbers reduction. The reduction step determines r float
+ (|r| < pi/4) and n signed integer such that x = r + n * pi/2. */
+ int32_t n;
+ float r;
+ if (ia12 < top12 (Pio4))
+ {
+ /* Optimize small values. */
+ if (unlikely (ia12 < top12 (0x1p-12f)))
+ {
+ if (unlikely (ia12 < top12 (0x1p-126f)))
+ /* Force underflow for tiny x. */
+ force_eval_float (x * x);
+ return x;
+ }
+
+ /* tan (x) ~= x + x^3 * P(x^2). */
+ float x2 = x * x;
+ float y = eval_P (x2);
+ return fmaf (x2, x * y, x);
+ }
+ /* Similar to other trigonometric routines, fast inaccurate reduction is
+ performed for values of x from pi/4 up to RangeVal. In order to keep errors
+ below 3.5ulps, we set the value of RangeVal to 2^17. This might differ for
+ other trigonometric routines. Above this value more advanced but slower
+ reduction techniques need to be implemented to reach a similar accuracy.
+ */
+ else if (ia12 < top12 (RangeVal))
+ {
+ /* Fast inaccurate reduction. */
+ r = reduce (x, &n);
+ }
+ else if (ia12 < 0x7f8)
+ {
+ /* Slow accurate reduction. */
+ uint32_t sign = ix & ~AbsMask;
+ double dar = reduce_large (ia, &n);
+ float ar = (float) dar;
+ r = asfloat (asuint (ar) ^ sign);
+ }
+ else
+ {
+ /* tan(Inf or NaN) is NaN. */
+ return __math_invalidf (x);
+ }
+
+ /* If x lives in an interval where |tan(x)|
+ - is finite then use an approximation of tangent in the form
+ tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
+ - grows to infinity then use an approximation of cotangent in the form
+ cotan(z) ~ 1/z + z * Q(z^2), where the reciprocal can be computed early.
+ Using symmetries of tangent and the identity tan(r) = cotan(pi/2 - r),
+ we only need to change the sign of r to obtain tan(x) from cotan(r).
+ This 2-interval approach requires 2 different sets of coefficients P and
+ Q, where Q is a lower order polynomial than P. */
+
+ /* Determine if x lives in an interval where |tan(x)| grows to infinity. */
+ uint32_t alt = (uint32_t) n & 1;
+
+ /* Perform additional reduction if required. */
+ float z = alt ? -r : r;
+
+ /* Prepare backward transformation. */
+ float z2 = r * r;
+ float offset = alt ? 1.0f / z : z;
+ float scale = alt ? z : z * z2;
+
+ /* Evaluate polynomial approximation of tan or cotan. */
+ float p = alt ? eval_Q (z2) : eval_P (z2);
+
+ /* A unified way of assembling the result on both interval types. */
+ return fmaf (scale, p, offset);
+}
+
+PL_SIG (S, F, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (tanf, 2.80)
+PL_TEST_INTERVAL (tanf, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (tanf, 0x1p-127, 0x1p-14, 50000)
+PL_TEST_INTERVAL (tanf, -0x1p-127, -0x1p-14, 50000)
+PL_TEST_INTERVAL (tanf, 0x1p-14, 0.7, 50000)
+PL_TEST_INTERVAL (tanf, -0x1p-14, -0.7, 50000)
+PL_TEST_INTERVAL (tanf, 0.7, 1.5, 50000)
+PL_TEST_INTERVAL (tanf, -0.7, -1.5, 50000)
+PL_TEST_INTERVAL (tanf, 1.5, 0x1p17, 50000)
+PL_TEST_INTERVAL (tanf, -1.5, -0x1p17, 50000)
+PL_TEST_INTERVAL (tanf, 0x1p17, 0x1p54, 50000)
+PL_TEST_INTERVAL (tanf, -0x1p17, -0x1p54, 50000)
+PL_TEST_INTERVAL (tanf, 0x1p54, inf, 50000)
+PL_TEST_INTERVAL (tanf, -0x1p54, -inf, 50000)
diff --git a/pl/math/tanf_data.c b/pl/math/tanf_data.c
new file mode 100644
index 0000000..a6b9d51
--- /dev/null
+++ b/pl/math/tanf_data.c
@@ -0,0 +1,45 @@
+/*
+ * Data used in single-precision tan(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct tanf_poly_data __tanf_poly_data = {
+.poly_tan = {
+/* Coefficients generated using:
+ poly = fpminimax((tan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a*a;b*b]);
+ optimize relative error
+ final prec : 23 bits
+ deg : 5
+ a : 0x1p-126 ^ 2
+ b : ((pi) / 0x1p2) ^ 2
+ dirty rel error: 0x1.f7c2e4p-25
+ dirty abs error: 0x1.f7c2ecp-25. */
+0x1.55555p-2,
+0x1.11166p-3,
+0x1.b88a78p-5,
+0x1.7b5756p-6,
+0x1.4ef4cep-8,
+0x1.0e1e74p-7
+},
+.poly_cotan = {
+/* Coefficients generated using:
+ fpminimax(f(x) = (0x1p0 / tan(sqrt(x)) - 0x1p0 / sqrt(x)) / sqrt(x), deg, [|dtype ...|], [a;b])
+ optimize a single polynomial
+ optimize absolute error
+ final prec : 23 bits
+ working prec : 128 bits
+ deg : 3
+ a : 0x1p-126
+ b : (pi) / 0x1p2
+ dirty rel error : 0x1.81298cp-25
+ dirty abs error : 0x1.a8acf4p-25. */
+-0x1.55555p-2, /* -0.33333325. */
+-0x1.6c23e4p-6, /* -2.2225354e-2. */
+-0x1.12dbap-9, /* -2.0969994e-3. */
+-0x1.05a1c2p-12, /* -2.495116e-4. */
+}
+};
diff --git a/pl/math/tanh_3u.c b/pl/math/tanh_3u.c
new file mode 100644
index 0000000..46d9fb3
--- /dev/null
+++ b/pl/math/tanh_3u.c
@@ -0,0 +1,82 @@
+/*
+ * Double-precision tanh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+#include "estrin.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define InvLn2 0x1.71547652b82fep0
+#define Ln2hi 0x1.62e42fefa39efp-1
+#define Ln2lo 0x1.abc9e3b39803fp-56
+#define Shift 0x1.8p52
+#define C(i) __expm1_poly[i]
+
+#define BoringBound 0x403241bf835f9d5f /* asuint64 (0x1.241bf835f9d5fp+4). */
+#define TinyBound 0x3e40000000000000 /* asuint64 (0x1p-27). */
+#define One 0x3ff0000000000000
+
+static inline double
+expm1_inline (double x)
+{
+ /* Helper routine for calculating exp(x) - 1. Copied from expm1_2u5.c, with
+ several simplifications:
+ - No special-case handling for tiny or special values.
+ - Simpler combination of p and t in final stage of the algorithm.
+ - Use shift-and-add instead of ldexp to calculate t. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ double j = fma (InvLn2, x, Shift) - Shift;
+ int64_t i = j;
+ double f = fma (j, -Ln2hi, x);
+ f = fma (j, -Ln2lo, f);
+
+ /* Approximate expm1(f) using polynomial. */
+ double f2 = f * f;
+ double f4 = f2 * f2;
+ double p = fma (f2, ESTRIN_10 (f, f2, f4, f4 * f4, C), f);
+
+ /* t = 2 ^ i. */
+ double t = asdouble ((uint64_t) (i + 1023) << 52);
+ /* expm1(x) = p * t + (t - 1). */
+ return fma (p, t, t - 1);
+}
+
+/* Approximation for double-precision tanh(x), using a simplified version of
+ expm1. The greatest observed error is 2.75 ULP:
+ tanh(-0x1.c143c3a44e087p-3) got -0x1.ba31ba4691ab7p-3
+ want -0x1.ba31ba4691ab4p-3. */
+double
+tanh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & AbsMask;
+ uint64_t sign = ix & ~AbsMask;
+
+ if (unlikely (ia > BoringBound))
+ {
+ if (ia > 0x7ff0000000000000)
+ return __math_invalid (x);
+ return asdouble (One | sign);
+ }
+
+ if (unlikely (ia < TinyBound))
+ return x;
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ double q = expm1_inline (2 * x);
+ return q / (q + 2);
+}
+
+PL_SIG (S, D, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (tanh, 2.26)
+PL_TEST_INTERVAL (tanh, 0, TinyBound, 1000)
+PL_TEST_INTERVAL (tanh, -0, -TinyBound, 1000)
+PL_TEST_INTERVAL (tanh, TinyBound, BoringBound, 100000)
+PL_TEST_INTERVAL (tanh, -TinyBound, -BoringBound, 100000)
+PL_TEST_INTERVAL (tanh, BoringBound, inf, 1000)
+PL_TEST_INTERVAL (tanh, -BoringBound, -inf, 1000)
diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c
new file mode 100644
index 0000000..76e54a4
--- /dev/null
+++ b/pl/math/tanhf_2u6.c
@@ -0,0 +1,91 @@
+/*
+ * Single-precision tanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define BoringBound \
+ 0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \
+ negative). */
+#define AbsMask 0x7fffffff
+#define One 0x3f800000
+
+#define Shift (0x1.8p23f)
+#define InvLn2 (0x1.715476p+0f)
+#define Ln2hi (0x1.62e4p-1f)
+#define Ln2lo (0x1.7f7d1cp-20f)
+
+#define C(i) __expm1f_poly[i]
+
+static inline float
+expm1f_inline (float x)
+{
+ /* Helper routine for calculating exp(x) - 1.
+ Copied from expm1f_1u6.c, with several simplifications:
+ - No special-case handling for tiny or special values, instead return early
+ from the main routine.
+ - No special handling for large values:
+ - No early return for infinity.
+ - Simpler combination of p and t in final stage of algorithm.
+ - |i| < 27, so can calculate t by simpler shift-and-add, instead of
+ ldexpf (same as vector algorithm). */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ float j = fmaf (InvLn2, x, Shift) - Shift;
+ int32_t i = j;
+ float f = fmaf (j, -Ln2hi, x);
+ f = fmaf (j, -Ln2lo, f);
+
+ /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
+ Uses Estrin scheme, where the main expm1f routine uses Horner. */
+ float f2 = f * f;
+ float p_01 = fmaf (f, C (1), C (0));
+ float p_23 = fmaf (f, C (3), C (2));
+ float p = fmaf (f2, p_23, p_01);
+ p = fmaf (f2 * f2, C (4), p);
+ p = fmaf (f2, p, f);
+
+ /* t = 2^i. */
+ float t = asfloat ((uint32_t) (i + 127) << 23);
+ /* expm1(x) ~= p * t + (t - 1). */
+ return fmaf (p, t, t - 1);
+}
+
+/* Approximation for single-precision tanh(x), using a simplified version of
+ expm1f. The maximum error is 2.58 ULP:
+ tanhf(0x1.fa5eep-5) got 0x1.f9ba02p-5
+ want 0x1.f9ba08p-5. */
+float
+tanhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ uint32_t sign = ix & ~AbsMask;
+
+ if (unlikely (iax > BoringBound))
+ {
+ if (iax > 0x7f800000)
+ return __math_invalidf (x);
+ return asfloat (One | sign);
+ }
+
+ if (unlikely (iax < 0x34000000))
+ return x;
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ float q = expm1f_inline (2 * x);
+ return q / (q + 2);
+}
+
+PL_SIG (S, F, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (tanhf, 2.09)
+PL_TEST_INTERVAL (tanhf, 0, 0x1p-23, 1000)
+PL_TEST_INTERVAL (tanhf, -0, -0x1p-23, 1000)
+PL_TEST_INTERVAL (tanhf, 0x1p-23, 0x1.205966p+3, 100000)
+PL_TEST_INTERVAL (tanhf, -0x1p-23, -0x1.205966p+3, 100000)
+PL_TEST_INTERVAL (tanhf, 0x1.205966p+3, inf, 100)
+PL_TEST_INTERVAL (tanhf, -0x1.205966p+3, -inf, 100)
diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h
new file mode 100644
index 0000000..e0f6ac7
--- /dev/null
+++ b/pl/math/test/mathbench_funcs.h
@@ -0,0 +1,86 @@
+// clang-format off
+/*
+ * Function entries for mathbench.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define _ZSF1(fun, a, b) F(fun##f, a, b)
+#define _ZSD1(f, a, b) D(f, a, b)
+
+#ifdef __vpcs
+
+#define _ZVF1(fun, a, b) F(__s_##fun##f, a, b) VF(__v_##fun##f, a, b) VNF(__vn_##fun##f, a, b) VNF(_ZGVnN4v_##fun##f, a, b)
+#define _ZVD1(f, a, b) D(__s_##f, a, b) VD(__v_##f, a, b) VND(__vn_##f, a, b) VND(_ZGVnN2v_##f, a, b)
+
+#elif __aarch64__
+
+#define _ZVF1(fun, a, b) F(__s_##fun##f, a, b) VF(__v_##fun##f, a, b)
+#define _ZVD1(f, a, b) D(__s_##f, a, b) VD(__v_##f, a, b)
+
+#elif WANT_VMATH
+
+#define _ZVF1(fun, a, b) F(__s_##fun##f, a, b)
+#define _ZVD1(f, a, b) D(__s_##f, a, b)
+
+#else
+
+#define _ZVF1(f, a, b)
+#define _ZVD1(f, a, b)
+
+#endif
+
+#if WANT_SVE_MATH
+
+#define _ZSVF1(fun, a, b) SVF(__sv_##fun##f_x, a, b) SVF(_ZGVsMxv_##fun##f, a, b)
+#define _ZSVD1(f, a, b) SVD(__sv_##f##_x, a, b) SVD(_ZGVsMxv_##f, a, b)
+
+#else
+
+#define _ZSVF1(f, a, b)
+#define _ZSVD1(f, a, b)
+
+#endif
+
+/* No auto-generated wrappers for binary functions - they have be
+ manually defined in mathbench_wrappers.h. We have to define silent
+ macros for them anyway as they will be emitted by PL_SIG. */
+#define _ZSF2(...)
+#define _ZSD2(...)
+#define _ZVF2(...)
+#define _ZVD2(...)
+#define _ZSVF2(...)
+#define _ZSVD2(...)
+
+#include "mathbench_funcs_gen.h"
+
+/* PL_SIG only emits entries for unary functions, since if a function
+ needs to be wrapped in mathbench there is no way for it to know the
+ same of the wrapper. Add entries for binary functions, or any other
+ exotic signatures that need wrapping, below. */
+
+{"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}},
+{"atan2", 'd', 0, -10.0, 10.0, {.d = atan2_wrap}},
+{"powi", 'd', 0, 0.01, 11.1, {.d = powi_wrap}},
+
+{"__s_atan2f", 'f', 0, -10.0, 10.0, {.f = __s_atan2f_wrap}},
+{"__s_atan2", 'd', 0, -10.0, 10.0, {.d = __s_atan2_wrap}},
+{"__v_atan2f", 'f', 'v', -10.0, 10.0, {.vf = __v_atan2f_wrap}},
+{"__v_atan2", 'd', 'v', -10.0, 10.0, {.vd = __v_atan2_wrap}},
+{"__vn_atan2f", 'f', 'n', -10.0, 10.0, {.vnf = __vn_atan2f_wrap}},
+{"_ZGVnN4vv_atan2f", 'f', 'n', -10.0, 10.0, {.vnf = _Z_atan2f_wrap}},
+{"__vn_atan2", 'd', 'n', -10.0, 10.0, {.vnd = __vn_atan2_wrap}},
+{"_ZGVnN2vv_atan2", 'd', 'n', -10.0, 10.0, {.vnd = _Z_atan2_wrap}},
+
+#if WANT_SVE_MATH
+{"__sv_atan2f_x", 'f', 's', -10.0, 10.0, {.svf = __sv_atan2f_wrap}},
+{"_ZGVsMxvv_atan2f", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_atan2f_wrap}},
+{"__sv_atan2_x", 'd', 's', -10.0, 10.0, {.svd = __sv_atan2_wrap}},
+{"_ZGVsM2vv_atan2", 'd', 's', -10.0, 10.0, {.svd = _Z_sv_atan2_wrap}},
+{"__sv_powif_x", 'f', 's', -10.0, 10.0, {.svf = __sv_powif_wrap}},
+{"_ZGVsMxvv_powi", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_powi_wrap}},
+{"__sv_powi_x", 'd', 's', -10.0, 10.0, {.svd = __sv_powi_wrap}},
+{"_ZGVsMxvv_powk", 'd', 's', -10.0, 10.0, {.svd = _Z_sv_powk_wrap}},
+#endif
+ // clang-format on
diff --git a/pl/math/test/mathbench_wrappers.h b/pl/math/test/mathbench_wrappers.h
new file mode 100644
index 0000000..eba960e
--- /dev/null
+++ b/pl/math/test/mathbench_wrappers.h
@@ -0,0 +1,133 @@
+/*
+ * Function wrappers for mathbench.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+static double
+atan2_wrap (double x)
+{
+ return atan2 (5.0, x);
+}
+
+static float
+atan2f_wrap (float x)
+{
+ return atan2f (5.0f, x);
+}
+
+static double
+powi_wrap (double x)
+{
+ return __builtin_powi (x, (int) round (x));
+}
+
+#if WANT_VMATH
+#if __aarch64__
+
+static double
+__s_atan2_wrap (double x)
+{
+ return __s_atan2 (5.0, x);
+}
+
+static float
+__s_atan2f_wrap (float x)
+{
+ return __s_atan2f (5.0f, x);
+}
+
+static v_double
+__v_atan2_wrap (v_double x)
+{
+ return __v_atan2 (v_double_dup (5.0), x);
+}
+
+static v_float
+__v_atan2f_wrap (v_float x)
+{
+ return __v_atan2f (v_float_dup (5.0f), x);
+}
+
+#ifdef __vpcs
+
+__vpcs static v_double
+__vn_atan2_wrap (v_double x)
+{
+ return __vn_atan2 (v_double_dup (5.0), x);
+}
+
+__vpcs static v_float
+__vn_atan2f_wrap (v_float x)
+{
+ return __vn_atan2f (v_float_dup (5.0f), x);
+}
+
+__vpcs static v_double
+_Z_atan2_wrap (v_double x)
+{
+ return _ZGVnN2vv_atan2 (v_double_dup (5.0), x);
+}
+
+__vpcs static v_float
+_Z_atan2f_wrap (v_float x)
+{
+ return _ZGVnN4vv_atan2f (v_float_dup (5.0f), x);
+}
+
+#endif // __vpcs
+#endif // __arch64__
+#endif // WANT_VMATH
+
+#if WANT_SVE_MATH
+
+static sv_float
+__sv_atan2f_wrap (sv_float x, sv_bool pg)
+{
+ return __sv_atan2f_x (x, svdup_n_f32 (5.0f), pg);
+}
+
+static sv_float
+_Z_sv_atan2f_wrap (sv_float x, sv_bool pg)
+{
+ return _ZGVsMxvv_atan2f (x, svdup_n_f32 (5.0f), pg);
+}
+
+static sv_double
+__sv_atan2_wrap (sv_double x, sv_bool pg)
+{
+ return __sv_atan2_x (x, svdup_n_f64 (5.0), pg);
+}
+
+static sv_double
+_Z_sv_atan2_wrap (sv_double x, sv_bool pg)
+{
+ return _ZGVsMxvv_atan2 (x, svdup_n_f64 (5.0), pg);
+}
+
+static sv_float
+_Z_sv_powi_wrap (sv_float x, sv_bool pg)
+{
+ return _ZGVsMxvv_powi (x, svcvt_s32_f32_x (pg, x), pg);
+}
+
+static sv_float
+__sv_powif_wrap (sv_float x, sv_bool pg)
+{
+ return __sv_powif_x (x, svcvt_s32_f32_x (pg, x), pg);
+}
+
+static sv_double
+_Z_sv_powk_wrap (sv_double x, sv_bool pg)
+{
+ return _ZGVsMxvv_powk (x, svcvt_s64_f64_x (pg, x), pg);
+}
+
+static sv_double
+__sv_powi_wrap (sv_double x, sv_bool pg)
+{
+ return __sv_powi_x (x, svcvt_s64_f64_x (pg, x), pg);
+}
+
+#endif // WANT_SVE_MATH
diff --git a/pl/math/test/pl_test.h b/pl/math/test/pl_test.h
new file mode 100644
index 0000000..467d1ca
--- /dev/null
+++ b/pl/math/test/pl_test.h
@@ -0,0 +1,33 @@
+/*
+ * PL macros for emitting various details about routines for consumption by
+ * runulp.sh.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+/* Emit the max ULP threshold, l, for routine f. Piggy-back PL_TEST_EXPECT_FENV
+ on PL_TEST_ULP to add EXPECT_FENV to all scalar routines. */
+#if !(V_SUPPORTED || SV_SUPPORTED)
+#define PL_TEST_ULP(f, l) \
+ PL_TEST_EXPECT_FENV_ALWAYS (f) \
+ PL_TEST_ULP f l
+#else
+#define PL_TEST_ULP(f, l) PL_TEST_ULP f l
+#endif
+
+/* Emit aliases to allow test params to be mapped from aliases back to their
+ aliasees. */
+#define PL_ALIAS(a, b) PL_TEST_ALIAS a b
+
+/* Emit routine name if e == 1 and f is expected to correctly trigger fenv
+ exceptions. e allows declaration to be emitted conditionally upon certain
+ build flags - defer expansion by one pass to allow those flags to be expanded
+ properly. */
+#define PL_TEST_EXPECT_FENV(f, e) PL_TEST_EXPECT_FENV_ (f, e)
+#define PL_TEST_EXPECT_FENV_(f, e) PL_TEST_EXPECT_FENV_##e (f)
+#define PL_TEST_EXPECT_FENV_1(f) PL_TEST_EXPECT_FENV_ENABLED f
+#define PL_TEST_EXPECT_FENV_ALWAYS(f) PL_TEST_EXPECT_FENV (f, 1)
+
+#define PL_TEST_INTERVAL(f, lo, hi, n) PL_TEST_INTERVAL f lo hi n
+#define PL_TEST_INTERVAL_C(f, lo, hi, n, c) PL_TEST_INTERVAL f lo hi n c
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
new file mode 100755
index 0000000..4d02530
--- /dev/null
+++ b/pl/math/test/runulp.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# ULP error check script.
+#
+# Copyright (c) 2019-2023, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+#set -x
+set -eu
+
+# cd to bin directory.
+cd "${0%/*}"
+
+flags="${ULPFLAGS:--q}"
+emu="$@"
+
+# Enable SVE testing
+WANT_SVE_MATH=${WANT_SVE_MATH:-0}
+
+FAIL=0
+PASS=0
+
+t() {
+ key=$(cat $ALIASES | { grep " $1$" || echo $1; } | awk '{print $1}')
+ L=$(cat $LIMITS | grep "^$key " | awk '{print $2}')
+ [[ $L =~ ^[0-9]+\.[0-9]+$ ]]
+ extra_flags=""
+ [[ -z "${5:-}" ]] || extra_flags="$extra_flags -c $5"
+ grep -q "^$key$" $FENV || extra_flags="$extra_flags -f"
+ $emu ./ulp -e $L $flags ${extra_flags} $1 $2 $3 $4 && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+}
+
+check() {
+ $emu ./ulp -f -q "$@" #>/dev/null
+}
+
+# Regression-test for correct NaN handling in atan2
+check atan2 0x1p-1022 0x1p-1000 x 0 0x1p-1022 40000
+check atan2 0x1.7887a0a717aefp+1017 0x1.7887a0a717aefp+1017 x -nan -nan
+check atan2 nan nan x -nan -nan
+
+# vector functions
+flags="${ULPFLAGS:--q}"
+runs=
+check __s_log10f 1 && runs=1
+runv=
+check __v_log10f 1 && runv=1
+runvn=
+check __vn_log10f 1 && runvn=1
+runsv=
+if [ $WANT_SVE_MATH -eq 1 ]; then
+check __sv_cosf 0 && runsv=1
+check __sv_cos 0 && runsv=1
+check __sv_sinf 0 && runsv=1
+check __sv_sin 0 && runsv=1
+# No guarantees about powi accuracy, so regression-test for exactness
+# w.r.t. the custom reference impl in ulp_wrappers.h
+check -q -f -e 0 __sv_powif 0 inf x 0 1000 100000 && runsv=1
+check -q -f -e 0 __sv_powif -0 -inf x 0 1000 100000 && runsv=1
+check -q -f -e 0 __sv_powif 0 inf x -0 -1000 100000 && runsv=1
+check -q -f -e 0 __sv_powif -0 -inf x -0 -1000 100000 && runsv=1
+check -q -f -e 0 __sv_powi 0 inf x 0 1000 100000 && runsv=1
+check -q -f -e 0 __sv_powi -0 -inf x 0 1000 100000 && runsv=1
+check -q -f -e 0 __sv_powi 0 inf x -0 -1000 100000 && runsv=1
+check -q -f -e 0 __sv_powi -0 -inf x -0 -1000 100000 && runsv=1
+fi
+
+while read F LO HI N C
+do
+ t $F $LO $HI $N $C
+done << EOF
+$(cat $INTERVALS)
+EOF
+
+[ 0 -eq $FAIL ] || {
+ echo "FAILED $FAIL PASSED $PASS"
+ exit 1
+}
diff --git a/pl/math/test/testcases/directed/acosh.tst b/pl/math/test/testcases/directed/acosh.tst
new file mode 100644
index 0000000..dd962bd
--- /dev/null
+++ b/pl/math/test/testcases/directed/acosh.tst
@@ -0,0 +1,19 @@
+; acosh.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=acosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=acosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=acosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=acosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=acosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=acosh op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=acosh op1=3fefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=00000000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=80000000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=bfefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=7fe01ac0.7f03a83e result=40862e50.541778f1.8cc error=0
diff --git a/pl/math/test/testcases/directed/acoshf.tst b/pl/math/test/testcases/directed/acoshf.tst
new file mode 100644
index 0000000..606c615
--- /dev/null
+++ b/pl/math/test/testcases/directed/acoshf.tst
@@ -0,0 +1,19 @@
+; acoshf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=acoshf op1=7fc00001 result=7fc00001 errno=0
+func=acoshf op1=ffc00001 result=7fc00001 errno=0
+func=acoshf op1=7f800001 result=7fc00001 errno=0 status=i
+func=acoshf op1=ff800001 result=7fc00001 errno=0 status=i
+func=acoshf op1=7f800000 result=7f800000 errno=0
+func=acoshf op1=3f800000 result=00000000 errno=0
+func=acoshf op1=3f7fffff result=7fc00001 errno=EDOM status=i
+func=acoshf op1=00000000 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=80000000 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=bf7fffff result=7fc00001 errno=EDOM status=i
+func=acoshf op1=bf800000 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=bf800001 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=7f767efe result=42b2c19d.83e error=0
diff --git a/pl/math/test/testcases/directed/asinh.tst b/pl/math/test/testcases/directed/asinh.tst
new file mode 100644
index 0000000..1485dfe
--- /dev/null
+++ b/pl/math/test/testcases/directed/asinh.tst
@@ -0,0 +1,18 @@
+; asinh.tst
+;
+; Copyright (c) 2022-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=asinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=asinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=asinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=asinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=asinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=asinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
+func=asinh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=asinh op1=80000000.00000000 result=80000000.00000000 errno=0
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=asinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=asinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/asinhf.tst b/pl/math/test/testcases/directed/asinhf.tst
new file mode 100644
index 0000000..eb76a58
--- /dev/null
+++ b/pl/math/test/testcases/directed/asinhf.tst
@@ -0,0 +1,18 @@
+; asinhf.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=asinhf op1=7fc00001 result=7fc00001 errno=0
+func=asinhf op1=ffc00001 result=7fc00001 errno=0
+func=asinhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=asinhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=asinhf op1=7f800000 result=7f800000 errno=0
+func=asinhf op1=ff800000 result=ff800000 errno=0
+func=asinhf op1=00000000 result=00000000 errno=0
+func=asinhf op1=80000000 result=80000000 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=asinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=asinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/atan.tst b/pl/math/test/testcases/directed/atan.tst
new file mode 100644
index 0000000..4c67055
--- /dev/null
+++ b/pl/math/test/testcases/directed/atan.tst
@@ -0,0 +1,22 @@
+; atan.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atan op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan op1=7ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan op1=fff00000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan op1=00000000.00000000 result=00000000.00000000 errno=0
+func=atan op1=80000000.00000000 result=80000000.00000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atan op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=atan op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
+
+func=atan op1=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
+func=atan op1=bff00000.00000000 result=bfe921fb.54442d18.469 errno=0
diff --git a/pl/math/test/testcases/directed/atan2.tst b/pl/math/test/testcases/directed/atan2.tst
new file mode 100644
index 0000000..647b376
--- /dev/null
+++ b/pl/math/test/testcases/directed/atan2.tst
@@ -0,0 +1,110 @@
+; atan2.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atan2 op1=7ff00000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff80000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff80000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff80000.00000001 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff80000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff80000.00000001 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff00000.00000000 op2=7ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
+func=atan2 op1=7ff00000.00000000 op2=fff00000.00000000 result=4002d97c.7f3321d2.34f errno=0
+func=atan2 op1=7ff00000.00000000 op2=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=7ff00000.00000000 op2=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=7ff00000.00000000 op2=3ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=7ff00000.00000000 op2=bff00000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff00000.00000000 op2=7ff00000.00000000 result=bfe921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=fff00000.00000000 result=c002d97c.7f3321d2.34f errno=0
+func=atan2 op1=fff00000.00000000 op2=00000000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=80000000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=3ff00000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=bff00000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=00000000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=00000000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=00000000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=00000000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=00000000.00000000 op2=7ff00000.00000000 result=00000000.00000000 errno=0
+func=atan2 op1=00000000.00000000 op2=fff00000.00000000 result=400921fb.54442d18.469 errno=0
+func=atan2 op1=00000000.00000000 op2=00000000.00000000 result=00000000.00000000 errno=0
+func=atan2 op1=00000000.00000000 op2=80000000.00000000 result=400921fb.54442d18.469 errno=0
+func=atan2 op1=00000000.00000000 op2=3ff00000.00000000 result=00000000.00000000 errno=0
+func=atan2 op1=00000000.00000000 op2=bff00000.00000000 result=400921fb.54442d18.469 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=atan2 op1=00000000.00000001 op2=3ff00000.00000000 result=00000000.00000001 errno=0 maybestatus=ux
+func=atan2 op1=80000000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=80000000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=80000000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=80000000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=80000000.00000000 op2=7ff00000.00000000 result=80000000.00000000 errno=0
+func=atan2 op1=80000000.00000000 op2=fff00000.00000000 result=c00921fb.54442d18.469 errno=0
+func=atan2 op1=80000000.00000000 op2=00000000.00000000 result=80000000.00000000 errno=0
+func=atan2 op1=80000000.00000000 op2=80000000.00000000 result=c00921fb.54442d18.469 errno=0
+func=atan2 op1=80000000.00000000 op2=3ff00000.00000000 result=80000000.00000000 errno=0
+func=atan2 op1=80000000.00000000 op2=bff00000.00000000 result=c00921fb.54442d18.469 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=atan2 op1=80000000.00000001 op2=3ff00000.00000000 result=80000000.00000001 errno=0 maybestatus=ux
+func=atan2 op1=3ff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=3ff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=3ff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=3ff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=3ff00000.00000000 op2=7ff00000.00000000 result=00000000.00000000 errno=0
+func=atan2 op1=3ff00000.00000000 op2=fff00000.00000000 result=400921fb.54442d18.469 errno=0
+func=atan2 op1=3ff00000.00000000 op2=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=3ff00000.00000000 op2=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=3ff00000.00000000 op2=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
+func=atan2 op1=3ff00000.00000000 op2=bff00000.00000000 result=4002d97c.7f3321d2.34f errno=0
+func=atan2 op1=bff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=bff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=bff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=bff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=bff00000.00000000 op2=7ff00000.00000000 result=80000000.00000000 errno=0
+func=atan2 op1=bff00000.00000000 op2=fff00000.00000000 result=c00921fb.54442d18.469 errno=0
+func=atan2 op1=bff00000.00000000 op2=00000000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=bff00000.00000000 op2=80000000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=bff00000.00000000 op2=3ff00000.00000000 result=bfe921fb.54442d18.469 errno=0
+func=atan2 op1=bff00000.00000000 op2=bff00000.00000000 result=c002d97c.7f3321d2.34f errno=0
+func=atan2 op1=3ff00000.00000000 op2=3ff00000.00000000 result=3fe921fb.54442d18 errno=0
diff --git a/pl/math/test/testcases/directed/atan2f.tst b/pl/math/test/testcases/directed/atan2f.tst
new file mode 100644
index 0000000..85c5c5d
--- /dev/null
+++ b/pl/math/test/testcases/directed/atan2f.tst
@@ -0,0 +1,121 @@
+; atan2f.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atan2f op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=7fc00001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=ffc00001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=7f800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=ff800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=00000000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=80000000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=3f800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=bf800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=7fc00001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=ffc00001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=7f800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=ff800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=00000000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=80000000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=3f800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=bf800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7fc00001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7fc00001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7fc00001 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=ffc00001 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=7f800000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=ff800000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=00000000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=80000000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=3f800000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=bf800000 result=7fc00001 errno=0
+func=atan2f op1=ffc00001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ffc00001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ffc00001 op2=7fc00001 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=7f800000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=ff800000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=00000000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=80000000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=3f800000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=bf800000 result=ffc00001 errno=0
+func=atan2f op1=7f800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=7f800000 op2=ffc00001 result=7fc00001 errno=0
+func=atan2f op1=7f800000 op2=7f800000 result=3f490fda.a22 errno=0
+func=atan2f op1=7f800000 op2=ff800000 result=4016cbe3.f99 errno=0
+func=atan2f op1=7f800000 op2=00000000 result=3fc90fda.a22 errno=0
+func=atan2f op1=7f800000 op2=80000000 result=3fc90fda.a22 errno=0
+func=atan2f op1=7f800000 op2=3f800000 result=3fc90fda.a22 errno=0
+func=atan2f op1=7f800000 op2=bf800000 result=3fc90fda.a22 errno=0
+func=atan2f op1=ff800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=ff800000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=ff800000 op2=7f800000 result=bf490fda.a22 errno=0
+func=atan2f op1=ff800000 op2=ff800000 result=c016cbe3.f99 errno=0
+func=atan2f op1=ff800000 op2=00000000 result=bfc90fda.a22 errno=0
+func=atan2f op1=ff800000 op2=80000000 result=bfc90fda.a22 errno=0
+func=atan2f op1=ff800000 op2=3f800000 result=bfc90fda.a22 errno=0
+func=atan2f op1=ff800000 op2=bf800000 result=bfc90fda.a22 errno=0
+func=atan2f op1=00000000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=00000000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=00000000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=00000000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=00000000 op2=7f800000 result=00000000 errno=0
+func=atan2f op1=00000000 op2=ff800000 result=40490fda.a22 errno=0
+func=atan2f op1=00000000 op2=00000000 result=00000000 errno=0
+func=atan2f op1=00000000 op2=80000000 result=40490fda.a22 errno=0
+func=atan2f op1=00000000 op2=3f800000 result=00000000 errno=0
+func=atan2f op1=00000000 op2=bf800000 result=40490fda.a22 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=atan2f op1=00000001 op2=3f800000 result=00000001 errno=0 maybestatus=ux
+
+func=atan2f op1=80000000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=80000000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=80000000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=80000000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=80000000 op2=7f800000 result=80000000 errno=0
+func=atan2f op1=80000000 op2=ff800000 result=c0490fda.a22 errno=0
+func=atan2f op1=80000000 op2=00000000 result=80000000 errno=0
+func=atan2f op1=80000000 op2=80000000 result=c0490fda.a22 errno=0
+func=atan2f op1=80000000 op2=3f800000 result=80000000 errno=0
+func=atan2f op1=80000000 op2=bf800000 result=c0490fda.a22 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=atan2f op1=80000001 op2=3f800000 result=80000001 errno=0 maybestatus=ux
+
+func=atan2f op1=3f800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=3f800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=3f800000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=3f800000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=3f800000 op2=7f800000 result=00000000 errno=0
+func=atan2f op1=3f800000 op2=ff800000 result=40490fda.a22 errno=0
+func=atan2f op1=3f800000 op2=00000000 result=3fc90fda.a22 errno=0
+func=atan2f op1=3f800000 op2=80000000 result=3fc90fda.a22 errno=0
+func=atan2f op1=3f800000 op2=3f800000 result=3f490fda.a22 errno=0
+func=atan2f op1=3f800000 op2=bf800000 result=4016cbe3.f99 errno=0
+func=atan2f op1=bf800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=bf800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=bf800000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=bf800000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=bf800000 op2=7f800000 result=80000000 errno=0
+func=atan2f op1=bf800000 op2=ff800000 result=c0490fda.a22 errno=0
+func=atan2f op1=bf800000 op2=00000000 result=bfc90fda.a22 errno=0
+func=atan2f op1=bf800000 op2=80000000 result=bfc90fda.a22 errno=0
+func=atan2f op1=bf800000 op2=3f800000 result=bf490fda.a22 errno=0
+func=atan2f op1=bf800000 op2=bf800000 result=c016cbe3.f99 errno=0
+func=atan2f op1=8005f16d op2=002bb601 result=be0a60a5.d88 error=0
+func=atan2f op1=80818ec8 op2=80ba5db9 result=c0222eda.f42 error=0
+
+func=atan2f op1=ff7fffff op2=ff7fffff result=c016cbe3.f99 errno=0
+func=atan2f op1=bfc00001 op2=7f7fffff result=80300000.700 errno=0 status=u
+func=atan2f op1=80800001 op2=40000000 result=80400000.800 errno=0 status=u
diff --git a/pl/math/test/testcases/directed/atanf.tst b/pl/math/test/testcases/directed/atanf.tst
new file mode 100644
index 0000000..0a0bfc2
--- /dev/null
+++ b/pl/math/test/testcases/directed/atanf.tst
@@ -0,0 +1,22 @@
+; atanf.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atanf op1=7fc00001 result=7fc00001 errno=0
+func=atanf op1=ffc00001 result=7fc00001 errno=0
+func=atanf op1=7f800001 result=7fc00001 errno=0 status=i
+func=atanf op1=ff800001 result=7fc00001 errno=0 status=i
+func=atanf op1=7f800000 result=3fc90fda.a22 errno=0
+func=atanf op1=ff800000 result=bfc90fda.a22 errno=0
+func=atanf op1=00000000 result=00000000 errno=0
+func=atanf op1=80000000 result=80000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atanf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=atanf op1=80000001 result=80000001 errno=0 maybestatus=ux
+
+func=atanf op1=3f800000 result=3f490fda.a22 errno=0
+func=atanf op1=bf800000 result=bf490fda.a22 errno=0
diff --git a/pl/math/test/testcases/directed/atanh.tst b/pl/math/test/testcases/directed/atanh.tst
new file mode 100644
index 0000000..d96ff32
--- /dev/null
+++ b/pl/math/test/testcases/directed/atanh.tst
@@ -0,0 +1,22 @@
+; atanh.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atanh op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=atanh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=atanh op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=atanh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=atanh op1=3ff00000.00000000 result=7ff00000.00000000 errno=ERANGE status=z
+func=atanh op1=bff00000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=atanh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=atanh op1=80000000.00000000 result=80000000.00000000 errno=0
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=atanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/atanhf.tst b/pl/math/test/testcases/directed/atanhf.tst
new file mode 100644
index 0000000..21a68a6
--- /dev/null
+++ b/pl/math/test/testcases/directed/atanhf.tst
@@ -0,0 +1,23 @@
+; atanhf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atanhf op1=7fc00001 result=7fc00001 errno=0
+func=atanhf op1=ffc00001 result=7fc00001 errno=0
+func=atanhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=atanhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=atanhf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=atanhf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=atanhf op1=3f800001 result=7fc00001 errno=EDOM status=i
+func=atanhf op1=bf800001 result=7fc00001 errno=EDOM status=i
+func=atanhf op1=3f800000 result=7f800000 errno=ERANGE status=z
+func=atanhf op1=bf800000 result=ff800000 errno=ERANGE status=z
+func=atanhf op1=00000000 result=00000000 errno=0
+func=atanhf op1=80000000 result=80000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=atanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/cbrtf.tst b/pl/math/test/testcases/directed/cbrtf.tst
new file mode 100644
index 0000000..0dd8d09
--- /dev/null
+++ b/pl/math/test/testcases/directed/cbrtf.tst
@@ -0,0 +1,29 @@
+; cbrtf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=cbrtf op1=7f800000 result=7f800000 errno=0
+func=cbrtf op1=ff800000 result=ff800000 errno=0
+func=cbrtf op1=7f800001 result=7fc00001 errno=0 status=i
+func=cbrtf op1=7fc00001 result=7fc00001 errno=0
+func=cbrtf op1=00000000 result=00000000 errno=0
+func=cbrtf op1=00000001 result=26a14517.cc7 errno=0
+func=cbrtf op1=00000002 result=26cb2ff5.29f errno=0
+func=cbrtf op1=00000003 result=26e89768.579 errno=0
+func=cbrtf op1=00000004 result=27000000.000 errno=0
+func=cbrtf op1=00400000 result=2a4b2ff5.29f errno=0
+func=cbrtf op1=00800000 result=2a800000.000 errno=0
+func=cbrtf op1=3f800000 result=3f800000.000 errno=0
+func=cbrtf op1=40000000 result=3fa14517.cc7 errno=0
+func=cbrtf op1=7f7fffff result=54cb2ff4.e63 errno=0
+func=cbrtf op1=80000000 result=80000000 errno=0
+func=cbrtf op1=80000001 result=a6a14517.cc7 errno=0
+func=cbrtf op1=80000002 result=a6cb2ff5.29f errno=0
+func=cbrtf op1=80000003 result=a6e89768.579 errno=0
+func=cbrtf op1=80000004 result=a7000000.000 errno=0
+func=cbrtf op1=80400000 result=aa4b2ff5.29f errno=0
+func=cbrtf op1=80800000 result=aa800000.000 errno=0
+func=cbrtf op1=bf800000 result=bf800000.000 errno=0
+func=cbrtf op1=c0000000 result=bfa14517.cc7 errno=0
+func=cbrtf op1=ff7fffff result=d4cb2ff4.e63 errno=0
diff --git a/pl/math/test/testcases/directed/cosh.tst b/pl/math/test/testcases/directed/cosh.tst
new file mode 100644
index 0000000..c4efacb
--- /dev/null
+++ b/pl/math/test/testcases/directed/cosh.tst
@@ -0,0 +1,15 @@
+; cosh.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=cosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=cosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=cosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=cosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=cosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=cosh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=cosh op1=fff00000.00000000 result=7ff00000.00000000 errno=0
+func=cosh op1=ffefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=cosh op1=00000000.00000000 result=3ff00000.00000000 errno=0
+func=cosh op1=80000000.00000000 result=3ff00000.00000000 errno=0
diff --git a/pl/math/test/testcases/directed/coshf.tst b/pl/math/test/testcases/directed/coshf.tst
new file mode 100644
index 0000000..2b967e7
--- /dev/null
+++ b/pl/math/test/testcases/directed/coshf.tst
@@ -0,0 +1,15 @@
+; coshf.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=coshf op1=7fc00001 result=7fc00001 errno=0
+func=coshf op1=ffc00001 result=7fc00001 errno=0
+func=coshf op1=7f800001 result=7fc00001 errno=0 status=i
+func=coshf op1=ff800001 result=7fc00001 errno=0 status=i
+func=coshf op1=7f800000 result=7f800000 errno=0
+func=coshf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
+func=coshf op1=ff800000 result=7f800000 errno=0
+func=coshf op1=ff7fffff result=7f800000 errno=ERANGE status=ox
+func=coshf op1=00000000 result=3f800000 errno=0
+func=coshf op1=80000000 result=3f800000 errno=0
diff --git a/pl/math/test/testcases/directed/erfc.tst b/pl/math/test/testcases/directed/erfc.tst
new file mode 100644
index 0000000..c03fc59
--- /dev/null
+++ b/pl/math/test/testcases/directed/erfc.tst
@@ -0,0 +1,23 @@
+; erfc.tst - Directed test cases for erfc
+;
+; Copyright (c) 2022-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=erfc op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=erfc op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=erfc op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=erfc op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=erfc op1=7ff00000.00000000 result=00000000.00000000 errno=0
+func=erfc op1=7fefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
+; We deliberately turned off errno setting in erf, as standard simply
+; state that errno `may` be set to ERANGE in case of underflow.
+; As a result the following condition on errno cannot be satisfied.
+;
+; func=erfc op1=403b44af.48b01531 result=00000000.00000000 errno=ERANGE status=ux
+;
+func=erfc op1=c03b44af.48b01531 result=40000000.00000000 errno=0
+func=erfc op1=403bffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
+func=erfc op1=c03bffff.ffffffff result=40000000.00000000 errno=0
+func=erfc op1=fff00000.00000000 result=40000000.00000000 errno=0
+func=erfc op1=00000000.00000000 result=3ff00000.00000000 errno=0
+func=erfc op1=80000000.00000000 result=3ff00000.00000000 errno=0
diff --git a/pl/math/test/testcases/directed/erfcf.tst b/pl/math/test/testcases/directed/erfcf.tst
new file mode 100644
index 0000000..719bacc
--- /dev/null
+++ b/pl/math/test/testcases/directed/erfcf.tst
@@ -0,0 +1,14 @@
+; erfcf.tst - Directed test cases for erfcf
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=erfcf op1=7fc00001 result=7fc00001 errno=0
+func=erfcf op1=ffc00001 result=7fc00001 errno=0
+func=erfcf op1=7f800001 result=7fc00001 errno=0 status=i
+func=erfcf op1=ff800001 result=7fc00001 errno=0 status=i
+func=erfcf op1=7f800000 result=00000000 errno=0
+func=erfcf op1=7f7fffff result=00000000 errno=ERANGE status=ux
+func=erfcf op1=ff800000 result=40000000 errno=0
+func=erfcf op1=00000000 result=3f800000 errno=0
+func=erfcf op1=80000000 result=3f800000 errno=0
diff --git a/pl/math/test/testcases/directed/erff.tst b/pl/math/test/testcases/directed/erff.tst
new file mode 100644
index 0000000..9b1d3d5
--- /dev/null
+++ b/pl/math/test/testcases/directed/erff.tst
@@ -0,0 +1,17 @@
+; erff.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=erff op1=7fc00001 result=7fc00001 errno=0
+func=erff op1=ffc00001 result=7fc00001 errno=0
+func=erff op1=7f800001 result=7fc00001 errno=0 status=i
+func=erff op1=ff800001 result=7fc00001 errno=0 status=i
+func=erff op1=7f800000 result=3f800000 errno=0
+func=erff op1=ff800000 result=bf800000 errno=0
+func=erff op1=00000000 result=00000000 errno=ERANGE
+func=erff op1=80000000 result=80000000 errno=ERANGE
+func=erff op1=00000001 result=00000001 errno=0 status=ux
+func=erff op1=80000001 result=80000001 errno=0 status=ux
+func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0
+func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0
diff --git a/pl/math/test/testcases/directed/expm1.tst b/pl/math/test/testcases/directed/expm1.tst
new file mode 100644
index 0000000..609d6f4
--- /dev/null
+++ b/pl/math/test/testcases/directed/expm1.tst
@@ -0,0 +1,21 @@
+; expm1.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=expm1 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=expm1 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=expm1 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=expm1 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=expm1 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=expm1 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=expm1 op1=fff00000.00000000 result=bff00000.00000000 errno=0
+func=expm1 op1=ffefffff.ffffffff result=bff00000.00000000 errno=0
+func=expm1 op1=00000000.00000000 result=00000000.00000000 errno=0
+func=expm1 op1=80000000.00000000 result=80000000.00000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=expm1 op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=expm1 op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/expm1f.tst b/pl/math/test/testcases/directed/expm1f.tst
new file mode 100644
index 0000000..44c3842
--- /dev/null
+++ b/pl/math/test/testcases/directed/expm1f.tst
@@ -0,0 +1,57 @@
+; expm1f.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=expm1f op1=7fc00001 result=7fc00001 errno=0
+func=expm1f op1=ffc00001 result=7fc00001 errno=0
+func=expm1f op1=7f800001 result=7fc00001 errno=0 status=i
+func=expm1f op1=ff800001 result=7fc00001 errno=0 status=i
+func=expm1f op1=7f800000 result=7f800000 errno=0
+func=expm1f op1=7f7fffff result=7f800000 errno=ERANGE status=ox
+func=expm1f op1=ff800000 result=bf800000 errno=0
+func=expm1f op1=ff7fffff result=bf800000 errno=0
+func=expm1f op1=00000000 result=00000000 errno=0
+func=expm1f op1=80000000 result=80000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+
+func=expm1f op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=expm1f op1=80000001 result=80000001 errno=0 maybestatus=ux
+
+func=expm1f op1=42b145c0 result=7f6ac2dd.9b8 errno=0
+
+; Check both sides of the over/underflow thresholds in the code.
+func=expm1f op1=c2000000 result=bf7fffff.fff error=0
+func=expm1f op1=c2000001 result=bf7fffff.fff error=0
+func=expm1f op1=43000000 result=7f800000 error=overflow
+func=expm1f op1=43000001 result=7f800000 error=overflow
+func=expm1f op1=c2a80000 result=bf800000.000 error=0
+func=expm1f op1=c2a80001 result=bf800000.000 error=0
+
+; Check values for which exp goes denormal. expm1f should not report
+; spurious overflow.
+func=expm1f op1=c2b00f34 result=bf800000.000 error=0
+func=expm1f op1=c2ce8ed0 result=bf800000.000 error=0
+func=expm1f op1=c2dc6bba result=bf800000.000 error=0
+
+; Regression tests for significance loss when the two components of
+; the result have opposite sign but similar magnitude
+func=expm1f op1=be8516c1 result=be6a652b.0dc error=0
+func=expm1f op1=be851714 result=be6a65ab.0e5 error=0
+func=expm1f op1=be851cc7 result=be6a6e75.111 error=0
+func=expm1f op1=be851d1a result=be6a6ef5.102 error=0
+func=expm1f op1=be851d6d result=be6a6f75.0f2 error=0
+func=expm1f op1=be852065 result=be6a7409.0e4 error=0
+func=expm1f op1=be8520b8 result=be6a7489.0c7 error=0
+func=expm1f op1=be85210b result=be6a7509.0a8 error=0
+func=expm1f op1=be855401 result=be6ac39b.0d5 error=0
+func=expm1f op1=be933307 result=be7fdbf0.d8d error=0
+func=expm1f op1=be92ed6b result=be7f737a.d81 error=0
+func=expm1f op1=be933b90 result=be7fe8be.d76 error=0
+func=expm1f op1=3eb11364 result=3ed38deb.0c0 error=0
+func=expm1f op1=3f28e830 result=3f6f344b.0da error=0
+func=expm1f op1=3eb1578f result=3ed3ee47.13b error=0
+func=expm1f op1=3f50176a result=3fa08e36.fea error=0
diff --git a/pl/math/test/testcases/directed/log10.tst b/pl/math/test/testcases/directed/log10.tst
new file mode 100644
index 0000000..3483143
--- /dev/null
+++ b/pl/math/test/testcases/directed/log10.tst
@@ -0,0 +1,16 @@
+; log10.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log10 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log10 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log10 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log10 op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=log10 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=log10 op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=log10 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log10 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log10 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log10 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
diff --git a/pl/math/test/testcases/directed/log10f.tst b/pl/math/test/testcases/directed/log10f.tst
new file mode 100644
index 0000000..d5744a6
--- /dev/null
+++ b/pl/math/test/testcases/directed/log10f.tst
@@ -0,0 +1,69 @@
+; log10f.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log10f op1=7fc00001 result=7fc00001 errno=0
+func=log10f op1=ffc00001 result=7fc00001 errno=0
+func=log10f op1=7f800001 result=7fc00001 errno=0 status=i
+func=log10f op1=ff800001 result=7fc00001 errno=0 status=i
+func=log10f op1=ff810000 result=7fc00001 errno=0 status=i
+func=log10f op1=7f800000 result=7f800000 errno=0
+func=log10f op1=3f800000 result=00000000 errno=0
+func=log10f op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=log10f op1=00000000 result=ff800000 errno=ERANGE status=z
+func=log10f op1=80000000 result=ff800000 errno=ERANGE status=z
+func=log10f op1=80000001 result=7fc00001 errno=EDOM status=i
+
+; Directed tests for the special-case handling of log10 of things
+; very near 1
+func=log10f op1=3f81a618 result=3bb62472.b92 error=0
+func=log10f op1=3f876783 result=3cc811f4.26c error=0
+func=log10f op1=3f816af8 result=3b9cc4c7.057 error=0
+func=log10f op1=3f7bed7d result=bbe432cb.e23 error=0
+func=log10f op1=3f803ece result=3a59ff3a.a84 error=0
+func=log10f op1=3f80089f result=38ef9728.aa6 error=0
+func=log10f op1=3f86ab72 result=3cb4b711.457 error=0
+func=log10f op1=3f780854 result=bc60f953.904 error=0
+func=log10f op1=3f7c6d76 result=bbc7fd01.01c error=0
+func=log10f op1=3f85dff6 result=3c9fa76f.81f error=0
+func=log10f op1=3f7b87f4 result=bbfa9edc.be4 error=0
+func=log10f op1=3f81c710 result=3bc4457b.745 error=0
+func=log10f op1=3f80946d result=3b00a140.c06 error=0
+func=log10f op1=3f7e87ea result=bb23cd70.828 error=0
+func=log10f op1=3f811437 result=3b6ee960.b40 error=0
+func=log10f op1=3f858dcf result=3c971d9b.2ea error=0
+func=log10f op1=3f7f61a3 result=ba89b814.4e0 error=0
+func=log10f op1=3f82d642 result=3c1bfb8d.517 error=0
+func=log10f op1=3f80f3bc result=3b52ebe8.c75 error=0
+func=log10f op1=3f85eff9 result=3ca150d9.7e8 error=0
+func=log10f op1=3f843eb8 result=3c68263f.771 error=0
+func=log10f op1=3f78e691 result=bc481cf4.50a error=0
+func=log10f op1=3f87c56f result=3cd1b268.5e6 error=0
+func=log10f op1=3f83b711 result=3c4b94c5.918 error=0
+func=log10f op1=3f823b2b result=3bf5eb02.e2a error=0
+func=log10f op1=3f7f2c4e result=bab82c80.519 error=0
+func=log10f op1=3f83fc92 result=3c5a3ba1.543 error=0
+func=log10f op1=3f793956 result=bc3ee04e.03c error=0
+func=log10f op1=3f839ba5 result=3c45caca.92a error=0
+func=log10f op1=3f862f30 result=3ca7de76.16f error=0
+func=log10f op1=3f832a20 result=3c2dc6e9.afd error=0
+func=log10f op1=3f810296 result=3b5fb92a.429 error=0
+func=log10f op1=3f7e58c9 result=bb38655a.0a4 error=0
+func=log10f op1=3f8362e7 result=3c39cc65.d15 error=0
+func=log10f op1=3f7fdb85 result=b97d9016.40b error=0
+func=log10f op1=3f84484e result=3c6a29f2.f74 error=0
+func=log10f op1=3f861862 result=3ca5819e.f2d error=0
+func=log10f op1=3f7c027b result=bbdf912d.440 error=0
+func=log10f op1=3f867803 result=3caf6744.34d error=0
+func=log10f op1=3f789a89 result=bc509bce.458 error=0
+func=log10f op1=3f8361d9 result=3c399347.379 error=0
+func=log10f op1=3f7d3ac3 result=bb9ad93a.93d error=0
+func=log10f op1=3f7ee241 result=baf8bd12.a62 error=0
+func=log10f op1=3f83a1fd result=3c4721bd.0a4 error=0
+func=log10f op1=3f840da3 result=3c5dd375.675 error=0
+func=log10f op1=3f79c2fe result=bc2f8a60.8c5 error=0
+func=log10f op1=3f854a93 result=3c901cc9.add error=0
+func=log10f op1=3f87a50a result=3cce6125.cd6 error=0
+func=log10f op1=3f818bf5 result=3baaee68.a55 error=0
+func=log10f op1=3f830a44 result=3c2705c4.d87 error=0
diff --git a/pl/math/test/testcases/directed/log1p.tst b/pl/math/test/testcases/directed/log1p.tst
new file mode 100644
index 0000000..9ee8c62
--- /dev/null
+++ b/pl/math/test/testcases/directed/log1p.tst
@@ -0,0 +1,22 @@
+; log1p.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log1p op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log1p op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log1p op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others.
+; The main reason seems to be the handling of errno and exceptions.
+
+func=log1p op1=00000000.00000000 result=00000000.00000000 errno=0
+func=log1p op1=80000000.00000000 result=80000000.00000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=log1p op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=log1p op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/log1pf.tst b/pl/math/test/testcases/directed/log1pf.tst
new file mode 100644
index 0000000..aaa01d6
--- /dev/null
+++ b/pl/math/test/testcases/directed/log1pf.tst
@@ -0,0 +1,130 @@
+; log1pf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log1pf op1=7fc00001 result=7fc00001 errno=0
+func=log1pf op1=ffc00001 result=7fc00001 errno=0
+func=log1pf op1=7f800001 result=7fc00001 errno=0 status=i
+func=log1pf op1=ff800001 result=7fc00001 errno=0 status=i
+func=log1pf op1=ff810000 result=7fc00001 errno=0 status=i
+func=log1pf op1=7f800000 result=7f800000 errno=0
+
+; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others.
+; The main reason seems to be the handling of errno and exceptions.
+
+func=log1pf op1=00000000 result=00000000 errno=0
+func=log1pf op1=80000000 result=80000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=log1pf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=log1pf op1=80000001 result=80000001 errno=0 maybestatus=ux
+
+func=log1pf op1=3f1e91ee result=3ef6d127.fdb errno=0
+func=log1pf op1=3f201046 result=3ef8a881.fba errno=0
+func=log1pf op1=3f21b916 result=3efab23b.f9f errno=0
+func=log1pf op1=3f21bde6 result=3efab821.fee errno=0
+func=log1pf op1=3f22a5ee result=3efbd435.ff2 errno=0
+func=log1pf op1=3f231b56 result=3efc63b7.e26 errno=0
+func=log1pf op1=3f23ce96 result=3efd3e83.fc8 errno=0
+func=log1pf op1=3eee18c6 result=3ec38576.02e errno=0
+func=log1pf op1=3eee2f41 result=3ec394ce.057 errno=0
+func=log1pf op1=3eee770d result=3ec3c5cc.00c errno=0
+func=log1pf op1=3eee7fed result=3ec3cbda.065 errno=0
+func=log1pf op1=3eee8fb2 result=3ec3d69c.008 errno=0
+func=log1pf op1=3eeeb8eb result=3ec3f2ba.061 errno=0
+func=log1pf op1=3eeeccfd result=3ec4006a.01d errno=0
+func=log1pf op1=3eeef5f0 result=3ec41c56.020 errno=0
+func=log1pf op1=3eeeff12 result=3ec42290.00c errno=0
+func=log1pf op1=3eef05cf result=3ec42728.052 errno=0
+func=log1pf op1=3eef13d3 result=3ec430b6.00e errno=0
+func=log1pf op1=3eef2e70 result=3ec442da.04a errno=0
+func=log1pf op1=3eef3fbf result=3ec44ea6.055 errno=0
+func=log1pf op1=3eef3feb result=3ec44ec4.021 errno=0
+func=log1pf op1=3eef4399 result=3ec45146.011 errno=0
+func=log1pf op1=3eef452e result=3ec4525a.049 errno=0
+func=log1pf op1=3eef4ea9 result=3ec458d0.020 errno=0
+func=log1pf op1=3eef7365 result=3ec471d8.05e errno=0
+func=log1pf op1=3eefa38f result=3ec492a8.003 errno=0
+func=log1pf op1=3eefb1f1 result=3ec49c74.015 errno=0
+func=log1pf op1=3eefb334 result=3ec49d50.023 errno=0
+func=log1pf op1=3eefb3c1 result=3ec49db0.0bf errno=0
+func=log1pf op1=3eefb591 result=3ec49eec.15d errno=0
+func=log1pf op1=3eefd736 result=3ec4b5d6.02d errno=0
+func=log1pf op1=3eefd797 result=3ec4b618.114 errno=0
+func=log1pf op1=3eefee5d result=3ec4c59a.071 errno=0
+func=log1pf op1=3eeffff4 result=3ec4d194.0a7 errno=0
+func=log1pf op1=3ef00cd1 result=3ec4da56.025 errno=0
+func=log1pf op1=3ef0163a result=3ec4e0be.07a errno=0
+func=log1pf op1=3ef01e89 result=3ec4e666.007 errno=0
+func=log1pf op1=3ef02004 result=3ec4e768.00a errno=0
+func=log1pf op1=3ef02c40 result=3ec4efbc.017 errno=0
+func=log1pf op1=3ef05b50 result=3ec50fc4.031 errno=0
+func=log1pf op1=3ef05bb1 result=3ec51006.05f errno=0
+func=log1pf op1=3ef0651b result=3ec5166e.0d9 errno=0
+func=log1pf op1=3ef06609 result=3ec51710.02a errno=0
+func=log1pf op1=3ef0666a result=3ec51752.049 errno=0
+func=log1pf op1=3ef0791e result=3ec5240c.0a8 errno=0
+func=log1pf op1=3ef07d46 result=3ec526e0.00e errno=0
+func=log1pf op1=3ef091fd result=3ec534f8.03c errno=0
+func=log1pf op1=3ef09602 result=3ec537b4.128 errno=0
+func=log1pf op1=3ef09848 result=3ec53940.044 errno=0
+func=log1pf op1=3ef0a04f result=3ec53eb6.07d errno=0
+func=log1pf op1=3ef0ab6a result=3ec54644.062 errno=0
+func=log1pf op1=3ef0ae49 result=3ec54838.002 errno=0
+func=log1pf op1=3ef0c1b8 result=3ec55570.000 errno=0
+func=log1pf op1=3ef0ca06 result=3ec55b16.00d errno=0
+func=log1pf op1=3ef0cc29 result=3ec55c8a.095 errno=0
+func=log1pf op1=3ef0d228 result=3ec5609e.04f errno=0
+func=log1pf op1=3ef0d8c0 result=3ec5651a.05e errno=0
+func=log1pf op1=3ef0dc0c result=3ec56758.029 errno=0
+func=log1pf op1=3ef0e0e8 result=3ec56aa6.02e errno=0
+func=log1pf op1=3ef0e502 result=3ec56d70.102 errno=0
+func=log1pf op1=3ef0e754 result=3ec56f04.017 errno=0
+func=log1pf op1=3ef0efe9 result=3ec574da.01c errno=0
+func=log1pf op1=3ef0f309 result=3ec576fa.016 errno=0
+func=log1pf op1=3ef0f499 result=3ec5780a.005 errno=0
+func=log1pf op1=3ef0f6c2 result=3ec57982.083 errno=0
+func=log1pf op1=3ef0f852 result=3ec57a92.05d errno=0
+func=log1pf op1=3ef0f9e2 result=3ec57ba2.02e errno=0
+func=log1pf op1=3ef119ee result=3ec5916c.024 errno=0
+func=log1pf op1=3ef11edf result=3ec594c8.03d errno=0
+func=log1pf op1=3ef128c4 result=3ec59b82.001 errno=0
+func=log1pf op1=3ef12ac1 result=3ec59cdc.04b errno=0
+func=log1pf op1=3ef12fea result=3ec5a05e.045 errno=0
+func=log1pf op1=3ef131e7 result=3ec5a1b8.05a errno=0
+func=log1pf op1=3ef134e1 result=3ec5a3be.00e errno=0
+func=log1pf op1=3ef1397a result=3ec5a6de.127 errno=0
+func=log1pf op1=3ef13ade result=3ec5a7d0.0f6 errno=0
+func=log1pf op1=3ef13c0d result=3ec5a89e.054 errno=0
+func=log1pf op1=3ef13d71 result=3ec5a990.016 errno=0
+func=log1pf op1=3ef14074 result=3ec5ab9c.12c errno=0
+func=log1pf op1=3ef146a0 result=3ec5afce.035 errno=0
+func=log1pf op1=3ef14a39 result=3ec5b240.024 errno=0
+func=log1pf op1=3ef14d39 result=3ec5b44a.00c errno=0
+func=log1pf op1=3ef152a3 result=3ec5b7f8.04d errno=0
+func=log1pf op1=3ef170a1 result=3ec5cc5a.021 errno=0
+func=log1pf op1=3ef17855 result=3ec5d196.0dc errno=0
+func=log1pf op1=3ef17ece result=3ec5d5fc.010 errno=0
+func=log1pf op1=3ef1810c result=3ec5d782.08e errno=0
+func=log1pf op1=3ef18da9 result=3ec5e014.0ae errno=0
+func=log1pf op1=3ef19054 result=3ec5e1e4.1a2 errno=0
+func=log1pf op1=3ef190ea result=3ec5e24a.048 errno=0
+func=log1pf op1=3ef1a739 result=3ec5f172.0d8 errno=0
+func=log1pf op1=3ef1a83c result=3ec5f222.018 errno=0
+func=log1pf op1=3ef1bbcc result=3ec5ff6c.09d errno=0
+func=log1pf op1=3ef1bd3c result=3ec60066.03a errno=0
+func=log1pf op1=3ef1d6ee result=3ec611da.056 errno=0
+func=log1pf op1=3ef1de36 result=3ec616cc.01b errno=0
+func=log1pf op1=3ef1e623 result=3ec61c2e.008 errno=0
+func=log1pf op1=3ef1e9b1 result=3ec61e98.029 errno=0
+func=log1pf op1=3ef1ee19 result=3ec62196.0d8 errno=0
+func=log1pf op1=3ef1f13a result=3ec623b6.039 errno=0
+func=log1pf op1=3ef1f1a7 result=3ec62400.091 errno=0
+func=log1pf op1=3ef1f214 result=3ec6244a.0e8 errno=0
+func=log1pf op1=3ef206e1 result=3ec6326a.09b errno=0
+func=log1pf op1=3ef21245 result=3ec63a26.012 errno=0
+func=log1pf op1=3ef217fd result=3ec63e08.048 errno=0
+func=log1pf op1=3ef2186a result=3ec63e52.063 errno=0
diff --git a/pl/math/test/testcases/directed/log2.tst b/pl/math/test/testcases/directed/log2.tst
new file mode 100644
index 0000000..5d1eb9b
--- /dev/null
+++ b/pl/math/test/testcases/directed/log2.tst
@@ -0,0 +1,21 @@
+; Directed test cases for log2
+;
+; Copyright (c) 2018-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0
+func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0
+func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0
+func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0
diff --git a/pl/math/test/testcases/directed/log2f.tst b/pl/math/test/testcases/directed/log2f.tst
new file mode 100644
index 0000000..4e08110
--- /dev/null
+++ b/pl/math/test/testcases/directed/log2f.tst
@@ -0,0 +1,27 @@
+; log2f.tst - Directed test cases for log2f
+;
+; Copyright (c) 2017-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log2f op1=7fc00001 result=7fc00001 errno=0
+func=log2f op1=ffc00001 result=7fc00001 errno=0
+func=log2f op1=7f800001 result=7fc00001 errno=0 status=i
+func=log2f op1=ff800001 result=7fc00001 errno=0 status=i
+func=log2f op1=ff810000 result=7fc00001 errno=0 status=i
+func=log2f op1=7f800000 result=7f800000 errno=0
+func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=log2f op1=3f800000 result=00000000 errno=0
+func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z
+func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z
+func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i
+
+func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0
+func=log2f op1=3f604189 result=be4394c8.395 error=0
+func=log2f op1=3f278034 result=bf1caa73.88e error=0
+func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0
+func=log2f op1=3e61259a result=c00bdb95.650 error=0
+func=log2f op1=3f8147ae result=3c6b3267.d6a error=0
+func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0
+func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0
+func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0
+func=log2f op1=40070838 result=3f89e055.a0a error=0
diff --git a/pl/math/test/testcases/directed/sinh.tst b/pl/math/test/testcases/directed/sinh.tst
new file mode 100644
index 0000000..d6a3da8
--- /dev/null
+++ b/pl/math/test/testcases/directed/sinh.tst
@@ -0,0 +1,21 @@
+; sinh.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=sinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=sinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=sinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=sinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=sinh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=sinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
+func=sinh op1=ffefffff.ffffffff result=fff00000.00000000 errno=ERANGE status=ox
+func=sinh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=sinh op1=80000000.00000000 result=80000000.00000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=sinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=sinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/sinhf.tst b/pl/math/test/testcases/directed/sinhf.tst
new file mode 100644
index 0000000..5f7bd1b
--- /dev/null
+++ b/pl/math/test/testcases/directed/sinhf.tst
@@ -0,0 +1,21 @@
+; sinhf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=sinhf op1=7fc00001 result=7fc00001 errno=0
+func=sinhf op1=ffc00001 result=7fc00001 errno=0
+func=sinhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=sinhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=sinhf op1=7f800000 result=7f800000 errno=0
+func=sinhf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
+func=sinhf op1=ff800000 result=ff800000 errno=0
+func=sinhf op1=ff7fffff result=ff800000 errno=ERANGE status=ox
+func=sinhf op1=00000000 result=00000000 errno=0
+func=sinhf op1=80000000 result=80000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=sinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=sinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/tanf.tst b/pl/math/test/testcases/directed/tanf.tst
new file mode 100644
index 0000000..3161f70
--- /dev/null
+++ b/pl/math/test/testcases/directed/tanf.tst
@@ -0,0 +1,25 @@
+; tanf.tst
+;
+; Copyright (c) 2022-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=tanf op1=7fc00001 result=7fc00001 errno=0
+func=tanf op1=ffc00001 result=7fc00001 errno=0
+func=tanf op1=7f800001 result=7fc00001 errno=0 status=i
+func=tanf op1=ff800001 result=7fc00001 errno=0 status=i
+func=tanf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=tanf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=tanf op1=00000000 result=00000000 errno=0
+func=tanf op1=80000000 result=80000000 errno=0
+; SDCOMP-26094: check tanf in the cases for which the range reducer
+; returns values furthest beyond its nominal upper bound of pi/4.
+func=tanf op1=46427f1b result=3f80396d.599 error=0
+func=tanf op1=4647e568 result=3f8039a6.c9f error=0
+func=tanf op1=46428bac result=3f803a03.148 error=0
+func=tanf op1=4647f1f9 result=3f803a3c.852 error=0
+func=tanf op1=4647fe8a result=3f803ad2.410 error=0
+func=tanf op1=45d8d7f1 result=bf800669.901 error=0
+func=tanf op1=45d371a4 result=bf800686.3cd error=0
+func=tanf op1=45ce0b57 result=bf8006a2.e9a error=0
+func=tanf op1=45d35882 result=bf80071b.bc4 error=0
+func=tanf op1=45cdf235 result=bf800738.693 error=0
diff --git a/pl/math/test/testcases/directed/tanh.tst b/pl/math/test/testcases/directed/tanh.tst
new file mode 100644
index 0000000..78776e6
--- /dev/null
+++ b/pl/math/test/testcases/directed/tanh.tst
@@ -0,0 +1,18 @@
+; tanh.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=tanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=tanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=tanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=tanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=tanh op1=7ff00000.00000000 result=3ff00000.00000000 errno=0
+func=tanh op1=fff00000.00000000 result=bff00000.00000000 errno=0
+func=tanh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=tanh op1=80000000.00000000 result=80000000.00000000 errno=0
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=tanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=tanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/directed/tanhf.tst b/pl/math/test/testcases/directed/tanhf.tst
new file mode 100644
index 0000000..603e310
--- /dev/null
+++ b/pl/math/test/testcases/directed/tanhf.tst
@@ -0,0 +1,20 @@
+; tanhf.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=tanhf op1=7fc00001 result=7fc00001 errno=0
+func=tanhf op1=ffc00001 result=7fc00001 errno=0
+func=tanhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=tanhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=tanhf op1=7f800000 result=3f800000 errno=0
+func=tanhf op1=ff800000 result=bf800000 errno=0
+func=tanhf op1=00000000 result=00000000 errno=0
+func=tanhf op1=80000000 result=80000000 errno=0
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+; func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+; func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
+func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/testcases/random/double.tst b/pl/math/test/testcases/random/double.tst
new file mode 100644
index 0000000..d83283e
--- /dev/null
+++ b/pl/math/test/testcases/random/double.tst
@@ -0,0 +1,6 @@
+!! double.tst - Random test case specification for DP functions
+!!
+!! Copyright (c) 1999-2023, Arm Limited.
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+test log10 10000
diff --git a/pl/math/test/testcases/random/float.tst b/pl/math/test/testcases/random/float.tst
new file mode 100644
index 0000000..fa77efe
--- /dev/null
+++ b/pl/math/test/testcases/random/float.tst
@@ -0,0 +1,8 @@
+!! float.tst - Random test case specification for SP functions
+!!
+!! Copyright (c) 2022-2023, Arm Limited.
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+test erff 10000
+test log10f 10000
+test tanf 10000
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
new file mode 100644
index 0000000..5e3133e
--- /dev/null
+++ b/pl/math/test/ulp_funcs.h
@@ -0,0 +1,66 @@
+/*
+ * Function entries for ulp.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifdef __vpcs
+
+#define _ZVF1(f) SF1 (f) VF1 (f) ZVNF1 (f)
+#define _ZVD1(f) SD1 (f) VD1 (f) ZVND1 (f)
+#define _ZVF2(f) SF2 (f) VF2 (f) ZVNF2 (f)
+#define _ZVD2(f) SD2 (f) VD2 (f) ZVND2 (f)
+
+#elif __aarch64
+
+#define _ZVF1(f) SF1 (f) VF1 (f)
+#define _ZVD1(f) SD1 (f) VD1 (f)
+#define _ZVF2(f) SF2 (f) VF2 (f)
+#define _ZVD2(f) SD2 (f) VD2 (f)
+
+#elif WANT_VMATH
+
+#define _ZVF1(f) SF1 (f)
+#define _ZVD1(f) SD1 (f)
+#define _ZVF2(f) SF2 (f)
+#define _ZVD2(f) SD2 (f)
+
+#else
+
+#define _ZVF1(f)
+#define _ZVD1(f)
+#define _ZVF2(f)
+#define _ZVD2(f)
+
+#endif
+
+#if WANT_SVE_MATH
+
+#define _ZSVF1(f) SVF1 (f) ZSVF1 (f)
+#define _ZSVF2(f) SVF2 (f) ZSVF2 (f)
+#define _ZSVD1(f) SVD1 (f) ZSVD1 (f)
+#define _ZSVD2(f) SVD2 (f) ZSVD2 (f)
+
+#else
+
+#define _ZSVF1(f)
+#define _ZSVF2(f)
+#define _ZSVD1(f)
+#define _ZSVD2(f)
+
+#endif
+
+#define _ZSF1(f) F1 (f)
+#define _ZSF2(f) F2 (f)
+#define _ZSD1(f) D1 (f)
+#define _ZSD2(f) D2 (f)
+
+#include "ulp_funcs_gen.h"
+
+#if WANT_SVE_MATH
+F (__sv_powi, sv_powi, ref_powi, mpfr_powi, 2, 0, d2, 0)
+F (_ZGVsMxvv_powk, Z_sv_powk, ref_powi, mpfr_powi, 2, 0, d2, 0)
+F (__sv_powif, sv_powif, ref_powif, mpfr_powi, 2, 1, f2, 0)
+F (_ZGVsMxvv_powi, Z_sv_powi, ref_powif, mpfr_powi, 2, 1, f2, 0)
+#endif
diff --git a/pl/math/test/ulp_wrappers.h b/pl/math/test/ulp_wrappers.h
new file mode 100644
index 0000000..b682e93
--- /dev/null
+++ b/pl/math/test/ulp_wrappers.h
@@ -0,0 +1,148 @@
+// clang-format off
+/*
+ * Function wrappers for ulp.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdbool.h>
+
+#if USE_MPFR
+static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
+ mpfr_cos(y, x, r);
+ return mpfr_sin(y, x, r);
+}
+static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
+ mpfr_sin(y, x, r);
+ return mpfr_cos(y, x, r);
+}
+static int wrap_mpfr_powi(mpfr_t ret, const mpfr_t x, const mpfr_t y, mpfr_rnd_t rnd) {
+ mpfr_t y2;
+ mpfr_init(y2);
+ mpfr_trunc(y2, y);
+ return mpfr_pow(ret, x, y2, rnd);
+}
+#endif
+
+/* Our implementations of powi/powk are too imprecise to verify
+ against any established pow implementation. Instead we have the
+ following simple implementation, against which it is enough to
+ maintain bitwise reproducibility. Note the test framework expects
+ the reference impl to be of higher precision than the function
+ under test. For instance this means that the reference for
+ double-precision powi will be passed a long double, so to check
+ bitwise reproducibility we have to cast it back down to
+ double. This is fine since a round-trip to higher precision and
+ back down is correctly rounded. */
+#define DECL_POW_INT_REF(NAME, DBL_T, FLT_T, INT_T) \
+ static DBL_T NAME (DBL_T in_val, DBL_T y) \
+ { \
+ INT_T n = (INT_T) round (y); \
+ FLT_T acc = 1.0; \
+ bool want_recip = n < 0; \
+ n = n < 0 ? -n : n; \
+ \
+ for (FLT_T c = in_val; n; c *= c, n >>= 1) \
+ { \
+ if (n & 0x1) \
+ { \
+ acc *= c; \
+ } \
+ } \
+ if (want_recip) \
+ { \
+ acc = 1.0 / acc; \
+ } \
+ return acc; \
+ }
+
+DECL_POW_INT_REF(ref_powif, double, float, int)
+DECL_POW_INT_REF(ref_powi, long double, double, int)
+
+#define VF1_WRAP(func) static float v_##func##f(float x) { return __v_##func##f(argf(x))[0]; }
+#define VF2_WRAP(func) static float v_##func##f(float x, float y) { return __v_##func##f(argf(x), argf(y))[0]; }
+#define VD1_WRAP(func) static double v_##func(double x) { return __v_##func(argd(x))[0]; }
+#define VD2_WRAP(func) static double v_##func(double x, double y) { return __v_##func(argd(x), argd(y))[0]; }
+
+#define VNF1_WRAP(func) static float vn_##func##f(float x) { return __vn_##func##f(argf(x))[0]; }
+#define VNF2_WRAP(func) static float vn_##func##f(float x, float y) { return __vn_##func##f(argf(x), argf(y))[0]; }
+#define VND1_WRAP(func) static double vn_##func(double x) { return __vn_##func(argd(x))[0]; }
+#define VND2_WRAP(func) static double vn_##func(double x, double y) { return __vn_##func(argd(x), argd(y))[0]; }
+
+#define ZVF1_WRAP(func) static float Z_##func##f(float x) { return _ZGVnN4v_##func##f(argf(x))[0]; }
+#define ZVF2_WRAP(func) static float Z_##func##f(float x, float y) { return _ZGVnN4vv_##func##f(argf(x), argf(y))[0]; }
+#define ZVD1_WRAP(func) static double Z_##func(double x) { return _ZGVnN2v_##func(argd(x))[0]; }
+#define ZVD2_WRAP(func) static double Z_##func(double x, double y) { return _ZGVnN2vv_##func(argd(x), argd(y))[0]; }
+
+#ifdef __vpcs
+
+#define ZVNF1_WRAP(func) VF1_WRAP(func) VNF1_WRAP(func) ZVF1_WRAP(func)
+#define ZVNF2_WRAP(func) VF2_WRAP(func) VNF2_WRAP(func) ZVF2_WRAP(func)
+#define ZVND1_WRAP(func) VD1_WRAP(func) VND1_WRAP(func) ZVD1_WRAP(func)
+#define ZVND2_WRAP(func) VD2_WRAP(func) VND2_WRAP(func) ZVD2_WRAP(func)
+
+#elif __aarch64__
+
+#define ZVNF1_WRAP(func) VF1_WRAP(func) VNF1_WRAP(func)
+#define ZVNF2_WRAP(func) VF2_WRAP(func) VNF2_WRAP(func)
+#define ZVND1_WRAP(func) VD1_WRAP(func) VND1_WRAP(func)
+#define ZVND2_WRAP(func) VD2_WRAP(func) VND2_WRAP(func)
+
+#elif WANT_VMATH
+
+#define ZVNF1_WRAP(func) VF1_WRAP(func)
+#define ZVNF2_WRAP(func) VF2_WRAP(func)
+#define ZVND1_WRAP(func) VD1_WRAP(func)
+#define ZVND2_WRAP(func) VD2_WRAP(func)
+
+#else
+
+#define ZVNF1_WRAP(func)
+#define ZVNF2_WRAP(func)
+#define ZVND1_WRAP(func)
+#define ZVND2_WRAP(func)
+
+#endif
+
+#define SVF1_WRAP(func) static float sv_##func##f(float x) { return svretf(__sv_##func##f_x(svargf(x), svptrue_b32())); }
+#define SVF2_WRAP(func) static float sv_##func##f(float x, float y) { return svretf(__sv_##func##f_x(svargf(x), svargf(y), svptrue_b32())); }
+#define SVD1_WRAP(func) static double sv_##func(double x) { return svretd(__sv_##func##_x(svargd(x), svptrue_b64())); }
+#define SVD2_WRAP(func) static double sv_##func(double x, double y) { return svretd(__sv_##func##_x(svargd(x), svargd(y), svptrue_b64())); }
+
+#define ZSVF1_WRAP(func) static float Z_sv_##func##f(float x) { return svretf(_ZGVsMxv_##func##f(svargf(x), svptrue_b32())); }
+#define ZSVF2_WRAP(func) static float Z_sv_##func##f(float x, float y) { return svretf(_ZGVsMxvv_##func##f(svargf(x), svargf(y), svptrue_b32())); }
+#define ZSVD1_WRAP(func) static double Z_sv_##func(double x) { return svretd(_ZGVsMxv_##func(svargd(x), svptrue_b64())); }
+#define ZSVD2_WRAP(func) static double Z_sv_##func(double x, double y) { return svretd(_ZGVsMxvv_##func(svargd(x), svargd(y), svptrue_b64())); }
+
+#if WANT_SVE_MATH
+
+#define ZSVNF1_WRAP(func) SVF1_WRAP(func) ZSVF1_WRAP(func)
+#define ZSVNF2_WRAP(func) SVF2_WRAP(func) ZSVF2_WRAP(func)
+#define ZSVND1_WRAP(func) SVD1_WRAP(func) ZSVD1_WRAP(func)
+#define ZSVND2_WRAP(func) SVD2_WRAP(func) ZSVD2_WRAP(func)
+
+#else
+
+#define ZSVNF1_WRAP(func)
+#define ZSVNF2_WRAP(func)
+#define ZSVND1_WRAP(func)
+#define ZSVND2_WRAP(func)
+
+#endif
+
+/* No wrappers for scalar routines, but PL_SIG will emit them. */
+#define ZSNF1_WRAP(func)
+#define ZSNF2_WRAP(func)
+#define ZSND1_WRAP(func)
+#define ZSND2_WRAP(func)
+
+#include "ulp_wrappers_gen.h"
+
+#if WANT_SVE_MATH
+static float Z_sv_powi(float x, float y) { return svretf(_ZGVsMxvv_powi(svargf(x), svdup_n_s32((int)round(y)), svptrue_b32())); }
+static float sv_powif(float x, float y) { return svretf(__sv_powif_x(svargf(x), svdup_n_s32((int)round(y)), svptrue_b32())); }
+static double Z_sv_powk(double x, double y) { return svretd(_ZGVsMxvv_powk(svargd(x), svdup_n_s64((long)round(y)), svptrue_b64())); }
+static double sv_powi(double x, double y) { return svretd(__sv_powi_x(svargd(x), svdup_n_s64((long)round(y)), svptrue_b64())); }
+#endif
+// clang-format on
diff --git a/pl/math/tools/asinh.sollya b/pl/math/tools/asinh.sollya
new file mode 100644
index 0000000..663ee92
--- /dev/null
+++ b/pl/math/tools/asinh.sollya
@@ -0,0 +1,28 @@
+// polynomial for approximating asinh(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Polynomial is used in [2^-26, 1]. However it is least accurate close to 1, so
+// we use 2^-6 as the lower bound for coeff generation, which yields sufficiently
+// accurate results in [2^-26, 2^-6].
+a = 0x1p-6;
+b = 1.0;
+
+f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2);
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = 0;
+for i from 0 to deg do {
+ i;
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/pl/math/tools/asinhf.sollya b/pl/math/tools/asinhf.sollya
new file mode 100644
index 0000000..ab115b5
--- /dev/null
+++ b/pl/math/tools/asinhf.sollya
@@ -0,0 +1,29 @@
+// polynomial for approximating asinh(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9;
+
+a = 0x1.0p-12;
+b = 1.0;
+
+f = proc(y) {
+ return asinh(x);
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do coeff(poly,i);
diff --git a/pl/math/tools/atan.sollya b/pl/math/tools/atan.sollya
new file mode 100644
index 0000000..ad4f33b
--- /dev/null
+++ b/pl/math/tools/atan.sollya
@@ -0,0 +1,23 @@
+// polynomial for approximating atan(x) and atan2(y, x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// atan is odd, so approximate with an odd polynomial:
+// x + ax^3 + bx^5 + cx^7 + ...
+// We generate a, b, c, ... such that we can approximate atan(x) by:
+// x + x^3 * (a + bx^2 + cx^4 + ...)
+
+// Assemble monomials
+deg = 20;
+mons = [|1,...,deg|];
+for i from 0 to deg-1 do mons[i] = mons[i] * 2 + 1;
+
+a = 0x1.0p-1022;
+b = 1;
+
+poly = fpminimax(atan(x)-x, mons, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg-1 do coeff(poly,mons[i]);
diff --git a/pl/math/tools/atanf.sollya b/pl/math/tools/atanf.sollya
new file mode 100644
index 0000000..ed88d0b
--- /dev/null
+++ b/pl/math/tools/atanf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating atanf(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Generate list of monomials:
+// Taylor series of atan is of the form x + ax^3 + bx^5 + cx^7 + ...
+// So generate a, b, c, ... such that we can approximate atan(x) by:
+// x + x^3 * (a + bx^2 + cx^4 + ...)
+
+deg = 7;
+
+a = 1.1754943508222875e-38;
+b = 1;
+
+poly = fpminimax((atan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/pl/math/tools/cbrt.sollya b/pl/math/tools/cbrt.sollya
new file mode 100644
index 0000000..1d43dc7
--- /dev/null
+++ b/pl/math/tools/cbrt.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating cbrt(x) in double precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 3;
+
+a = 0.5;
+b = 1;
+
+
+f = x^(1/3);
+
+poly = fpminimax(f, deg, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do round(coeff(poly,i), D, RN);
diff --git a/pl/math/tools/cbrtf.sollya b/pl/math/tools/cbrtf.sollya
new file mode 100644
index 0000000..4e0cc69
--- /dev/null
+++ b/pl/math/tools/cbrtf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating cbrt(x) in single precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 3;
+
+a = 0.5;
+b = 1;
+
+
+f = x^(1/3);
+
+poly = fpminimax(f, deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do round(coeff(poly,i), SG, RN);
diff --git a/pl/math/tools/erfc.sollya b/pl/math/tools/erfc.sollya
new file mode 100644
index 0000000..8c40b4b
--- /dev/null
+++ b/pl/math/tools/erfc.sollya
@@ -0,0 +1,23 @@
+// polynomial for approximating erfc(x)*exp(x*x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 12; // poly degree
+
+// interval bounds
+a = 0x1.60dfc14636e2ap0;
+b = 0x1.d413cccfe779ap0;
+
+f = proc(y) {
+ t = y + a;
+ return erfc(t) * exp(t*t);
+};
+
+poly = remez(f(x), deg, [0;b-a], 1, 1e-16);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do round(coeff(poly,i), 52, RN);
diff --git a/pl/math/tools/erfcf.sollya b/pl/math/tools/erfcf.sollya
new file mode 100644
index 0000000..69c6836
--- /dev/null
+++ b/pl/math/tools/erfcf.sollya
@@ -0,0 +1,31 @@
+// polynomial for approximating erfc(x)*exp(x*x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 15; // poly degree
+
+// interval bounds
+a = 0x1.0p-26;
+b = 2;
+
+f = proc(y) {
+ return erfc(y) * exp(y*y);
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = 0;
+for i from 0 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+ print(i);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/pl/math/tools/expm1.sollya b/pl/math/tools/expm1.sollya
new file mode 100644
index 0000000..7b6f324
--- /dev/null
+++ b/pl/math/tools/expm1.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating exp(x)-1 in double precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 12;
+
+a = -log(2)/2;
+b = log(2)/2;
+
+f = proc(y) {
+ return exp(y)-1;
+};
+
+poly = fpminimax(f(x), deg, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), D, RN);
diff --git a/pl/math/tools/expm1f.sollya b/pl/math/tools/expm1f.sollya
new file mode 100644
index 0000000..efdf1bd
--- /dev/null
+++ b/pl/math/tools/expm1f.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating exp(x)-1 in single precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 5;
+
+a = -log(2)/2;
+b = log(2)/2;
+
+f = proc(y) {
+ return exp(y)-1;
+};
+
+poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), SG, RN);
diff --git a/pl/math/tools/log10.sollya b/pl/math/tools/log10.sollya
new file mode 100644
index 0000000..85d1d15
--- /dev/null
+++ b/pl/math/tools/log10.sollya
@@ -0,0 +1,44 @@
+// polynomial for approximating log10(1+x)
+//
+// Copyright (c) 2019-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 6; // poly degree
+// |log10(1+x)| > 0x1p-5 outside the interval
+a = -0x1.p-5;
+b = 0x1.p-5;
+
+ln10 = evaluate(log(10),0);
+invln10hi = double(1/ln10 + 0x1p21) - 0x1p21; // round away last 21 bits
+invln10lo = double(1/ln10 - invln10hi);
+
+// find log10(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log10(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f/ln10;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln10hi + invln10lo;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+display = hexadecimal;
+print("invln10hi:", invln10hi);
+print("invln10lo:", invln10lo);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
+
+display = decimal;
+print("in [",a,b,"]");
diff --git a/pl/math/tools/log10f.sollya b/pl/math/tools/log10f.sollya
new file mode 100644
index 0000000..94bf32f
--- /dev/null
+++ b/pl/math/tools/log10f.sollya
@@ -0,0 +1,37 @@
+// polynomial for approximating log10f(1+x)
+//
+// Copyright (c) 2019-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Computation of log10f(1+x) will be carried out in double precision
+
+deg = 4; // poly degree
+// [OFF; 2*OFF] is divided in 2^4 intervals with OFF~0.7
+a = -0.04375;
+b = 0.04375;
+
+// find log(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do double(coeff(poly,i));
diff --git a/pl/math/tools/log1p.sollya b/pl/math/tools/log1p.sollya
new file mode 100644
index 0000000..598a36a
--- /dev/null
+++ b/pl/math/tools/log1p.sollya
@@ -0,0 +1,30 @@
+// polynomial for approximating log(1+x) in double precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 20;
+
+a = sqrt(2)/2-1;
+b = sqrt(2)-1;
+
+f = proc(y) {
+ return log(1+y);
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+
+print("coeffs:");
+display = hexadecimal;
+for i from 2 to deg do coeff(poly,i);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
diff --git a/pl/math/tools/log1pf.sollya b/pl/math/tools/log1pf.sollya
new file mode 100644
index 0000000..cc1db10
--- /dev/null
+++ b/pl/math/tools/log1pf.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating log(1+x) in single precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 10;
+
+a = -0.25;
+b = 0.5;
+
+f = proc(y) {
+ return log(1+y);
+};
+
+poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), SG, RN);
diff --git a/pl/math/tools/tan.sollya b/pl/math/tools/tan.sollya
new file mode 100644
index 0000000..bb0bb28
--- /dev/null
+++ b/pl/math/tools/tan.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating double precision tan(x)
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 8;
+
+// interval bounds
+a = 0x1.0p-126;
+b = pi / 8;
+
+display = hexadecimal;
+
+f = (tan(sqrt(x))-sqrt(x))/x^(3/2);
+poly = fpminimax(f, deg, [|double ...|], [a*a;b*b]);
+
+//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/pl/math/tools/tanf.sollya b/pl/math/tools/tanf.sollya
new file mode 100644
index 0000000..f4b49b4
--- /dev/null
+++ b/pl/math/tools/tanf.sollya
@@ -0,0 +1,78 @@
+// polynomial for approximating single precision tan(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+dtype = single;
+
+mthd = 0; // approximate tan
+deg = 5; // poly degree
+
+// // Uncomment for cotan
+// mthd = 1; // approximate cotan
+// deg = 3; // poly degree
+
+// interval bounds
+a = 0x1.0p-126;
+b = pi / 4;
+
+print("Print some useful constants");
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+
+print("pi/4");
+pi/4;
+
+// Setup precisions (display and computation)
+display = decimal!;
+prec=128!;
+save_prec=prec;
+
+//
+// Select function to approximate with Sollya
+//
+if(mthd==0) then {
+ s = "x + x^3 * P(x^2)";
+ g = tan(x);
+ F = proc(P) { return x + x^3 * P(x^2); };
+ f = (g(sqrt(x))-sqrt(x))/(x*sqrt(x));
+ init_poly = 0;
+ // Display info
+ print("Approximate g(x) =", g, "as F(x)=", s, ".");
+ poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]);
+}
+else if (mthd==1) then {
+ s = "1/x + x * P(x^2)";
+ g = 1 / tan(x);
+ F = proc(P) { return 1/x + x * P(x^2); };
+ f = (g(sqrt(x))-1/sqrt(x))/(sqrt(x));
+ init_poly = 0;
+ deg_init_poly = -1; // a value such that we actually start by building constant coefficient
+ // Display info
+ print("Approximate g(x) =", g, "as F(x)=", s, ".");
+ // Fpminimax used to minimise absolute error
+ approx_fpminimax = proc(func, poly, d) {
+ return fpminimax(func - poly / x^-(deg-d), 0, [|dtype|], [a;b], absolute, floating);
+ };
+ // Optimise all coefficients at once
+ poly = fpminimax(f, [|0,...,deg|], [|dtype ...|], [a;b], absolute, floating);
+};
+
+
+//
+// Display coefficients in Sollya
+//
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+print("_coeffs :_ hex");
+for i from 0 to deg do coeff(poly, i);
+
+// Compute errors
+display = hexadecimal!;
+d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]);
+d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]);
+print("dirty rel error:", d_rel_err);
+print("dirty abs error:", d_abs_err);
+print("in [",a,b,"]");
diff --git a/pl/math/tools/v_erf.sollya b/pl/math/tools/v_erf.sollya
new file mode 100644
index 0000000..394ba37
--- /dev/null
+++ b/pl/math/tools/v_erf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating erf(x).
+// To generate coefficients for interval i (0 to 47) do:
+// $ sollya v_erf.sollya $i
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+scale = 1/8;
+deg = 9;
+
+itv = parse(__argv[0]);
+if (itv == 0) then { a = 0x1p-1022; }
+else { a = itv * scale; };
+
+prec=256;
+
+poly = fpminimax(erf(scale*x+a), deg, [|D ...|], [0; 1]);
+
+display = hexadecimal;
+for i from 0 to deg do coeff(poly, i); \ No newline at end of file
diff --git a/pl/math/tools/v_erfc.sollya b/pl/math/tools/v_erfc.sollya
new file mode 100644
index 0000000..3b03ba0
--- /dev/null
+++ b/pl/math/tools/v_erfc.sollya
@@ -0,0 +1,46 @@
+// polynomial for approximating erfc(x)*exp(x*x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 12; // poly degree
+
+itv = parse(__argv[0]);
+
+bounds = [|3.725290298461914e-9,
+ 0.18920711500272103,
+ 0.41421356237309515,
+ 0.681792830507429,
+ 1,
+ 1.378414230005442,
+ 1.8284271247461903,
+ 2.363585661014858,
+ 3,
+ 3.756828460010884,
+ 4.656854249492381,
+ 5.727171322029716,
+ 7,
+ 8.513656920021768,
+ 10.313708498984761,
+ 12.454342644059432,
+ 15,
+ 18.027313840043536,
+ 21.627416997969522,
+ 25.908685288118864,
+ 31|];
+
+a = bounds[itv];
+b = bounds[itv + 1];
+
+f = proc(y) {
+ t = y + a;
+ return erfc(t) * exp(t*t);
+};
+
+poly = fpminimax(f(x), deg, [|double ...|], [0;b-a]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly, i);
diff --git a/pl/math/tools/v_log10.sollya b/pl/math/tools/v_log10.sollya
new file mode 100644
index 0000000..e2df436
--- /dev/null
+++ b/pl/math/tools/v_log10.sollya
@@ -0,0 +1,38 @@
+// polynomial used for __v_log10(x)
+//
+// Copyright (c) 2019-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 6; // poly degree
+a = -0x1.fc1p-9;
+b = 0x1.009p-8;
+
+// find log(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+// scale coefficients by 1/ln(10)
+ln10 = evaluate(log(10),0);
+poly = poly/ln10;
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do double(coeff(poly,i));
diff --git a/pl/math/tools/v_log10f.sollya b/pl/math/tools/v_log10f.sollya
new file mode 100644
index 0000000..396d5a9
--- /dev/null
+++ b/pl/math/tools/v_log10f.sollya
@@ -0,0 +1,45 @@
+// polynomial for approximating v_log10f(1+x)
+//
+// Copyright (c) 2019-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9; // poly degree
+// |log10(1+x)| > 0x1p-4 outside the interval
+a = -1/3;
+b = 1/3;
+
+display = hexadecimal;
+print("log10(2) = ", single(log10(2)));
+
+ln10 = evaluate(log(10),0);
+invln10 = single(1/ln10);
+
+// find log10(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log10(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f/ln10;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln10;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+display = hexadecimal;
+print("invln10:", invln10);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do single(coeff(poly,i));
+
+display = decimal;
+print("in [",a,b,"]");
diff --git a/pl/math/tools/v_log2f.sollya b/pl/math/tools/v_log2f.sollya
new file mode 100644
index 0000000..99e050c
--- /dev/null
+++ b/pl/math/tools/v_log2f.sollya
@@ -0,0 +1,38 @@
+// polynomial used for __v_log2f(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9; // poly degree
+a = -1/3;
+b = 1/3;
+
+ln2 = evaluate(log(2),0);
+invln2 = single(1/ln2);
+
+// find log2(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log2(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log2(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f * invln2;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln2;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/pl/math/v_acosh_3u5.c b/pl/math/v_acosh_3u5.c
new file mode 100644
index 0000000..22f69d7
--- /dev/null
+++ b/pl/math/v_acosh_3u5.c
@@ -0,0 +1,51 @@
+/*
+ * Single-precision vector acosh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define WANT_V_LOG1P_K0_SHORTCUT 1
+#include "v_log1p_inline.h"
+
+#define BigBoundTop 0x5fe /* top12 (asuint64 (0x1p511)). */
+
+#if V_SUPPORTED
+
+static NOINLINE VPCS_ATTR v_f64_t
+special_case (v_f64_t x)
+{
+ return v_call_f64 (acosh, x, x, v_u64 (-1));
+}
+
+/* Vector approximation for double-precision acosh, based on log1p.
+ The largest observed error is 3.02 ULP in the region where the
+ argument to log1p falls in the k=0 interval, i.e. x close to 1:
+ __v_acosh(0x1.00798aaf80739p+0) got 0x1.f2d6d823bc9dfp-5
+ want 0x1.f2d6d823bc9e2p-5. */
+VPCS_ATTR v_f64_t V_NAME (acosh) (v_f64_t x)
+{
+ v_u64_t itop = v_as_u64_f64 (x) >> 52;
+ v_u64_t special = v_cond_u64 ((itop - OneTop) >= (BigBoundTop - OneTop));
+
+ /* Fall back to scalar routine for all lanes if any of them are special. */
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x);
+
+ v_f64_t xm1 = x - 1;
+ v_f64_t u = xm1 * (x + 1);
+ return log1p_inline (xm1 + v_sqrt_f64 (u));
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, acosh, 1.0, 10.0)
+PL_TEST_ULP (V_NAME (acosh), 2.53)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (acosh))
+PL_TEST_INTERVAL (V_NAME (acosh), 1, 0x1p511, 90000)
+PL_TEST_INTERVAL (V_NAME (acosh), 0x1p511, inf, 10000)
+PL_TEST_INTERVAL (V_NAME (acosh), 0, 1, 1000)
+PL_TEST_INTERVAL (V_NAME (acosh), -0, -inf, 10000)
+#endif
diff --git a/pl/math/v_acoshf_3u1.c b/pl/math/v_acoshf_3u1.c
new file mode 100644
index 0000000..2b5aff5
--- /dev/null
+++ b/pl/math/v_acoshf_3u1.c
@@ -0,0 +1,68 @@
+/*
+ * Single-precision vector acosh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define SignMask 0x80000000
+#define One 0x3f800000
+#define SquareLim 0x5f800000 /* asuint(0x1p64). */
+
+#if V_SUPPORTED
+
+#include "v_log1pf_inline.h"
+
+static NOINLINE VPCS_ATTR v_f32_t
+special_case (v_f32_t x, v_f32_t y, v_u32_t special)
+{
+ return v_call_f32 (acoshf, x, y, special);
+}
+
+/* Vector approximation for single-precision acosh, based on log1p. Maximum
+ error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it
+ is 2.78 ULP:
+ __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3
+ want 0x1.ef9ea2p-3.
+ With exceptions disabled, we can compute u with a shorter dependency chain,
+ which gives maximum error of 3.07 ULP:
+ __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4
+ want 0x1.fbc7f4p-4. */
+
+VPCS_ATTR v_f32_t V_NAME (acoshf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t special = v_cond_u32 ((ix - One) >= (SquareLim - One));
+
+#if WANT_SIMD_EXCEPT
+ /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use
+ only xm1 to calculate u, as operating on x will trigger invalid for NaN. */
+ v_f32_t xm1 = v_sel_f32 (special, v_f32 (1), x - 1);
+ v_f32_t u = v_fma_f32 (xm1, xm1, 2 * xm1);
+#else
+ v_f32_t xm1 = x - 1;
+ v_f32_t u = xm1 * (x + 1.0f);
+#endif
+ v_f32_t y = log1pf_inline (xm1 + v_sqrt_f32 (u));
+
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, acosh, 1.0, 10.0)
+#if WANT_SIMD_EXCEPT
+PL_TEST_ULP (V_NAME (acoshf), 2.29)
+#else
+PL_TEST_ULP (V_NAME (acoshf), 2.58)
+#endif
+PL_TEST_EXPECT_FENV (V_NAME (acoshf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (acoshf), 0, 1, 500)
+PL_TEST_INTERVAL (V_NAME (acoshf), 1, SquareLim, 100000)
+PL_TEST_INTERVAL (V_NAME (acoshf), SquareLim, inf, 1000)
+PL_TEST_INTERVAL (V_NAME (acoshf), -0, -inf, 1000)
+#endif
diff --git a/pl/math/v_asinh_3u5.c b/pl/math/v_asinh_3u5.c
new file mode 100644
index 0000000..fd329b6
--- /dev/null
+++ b/pl/math/v_asinh_3u5.c
@@ -0,0 +1,175 @@
+/*
+ * Double-precision vector asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "estrin.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define OneTop 0x3ff /* top12(asuint64(1.0f)). */
+#define HugeBound 0x5fe /* top12(asuint64(0x1p511)). */
+#define TinyBound 0x3e5 /* top12(asuint64(0x1p-26)). */
+#define AbsMask v_u64 (0x7fffffffffffffff)
+#define C(i) v_f64 (__asinh_data.poly[i])
+
+/* Constants & data for log. */
+#define OFF 0x3fe6000000000000
+#define Ln2 v_f64 (0x1.62e42fefa39efp-1)
+#define A(i) v_f64 (__sv_log_data.poly[i])
+#define T(i) __log_data.tab[i]
+#define N (1 << LOG_TABLE_BITS)
+
+static NOINLINE v_f64_t
+special_case (v_f64_t x, v_f64_t y, v_u64_t special)
+{
+ return v_call_f64 (asinh, x, y, special);
+}
+
+struct entry
+{
+ v_f64_t invc;
+ v_f64_t logc;
+};
+
+static inline struct entry
+lookup (v_u64_t i)
+{
+ struct entry e;
+#ifdef SCALAR
+ e.invc = T (i).invc;
+ e.logc = T (i).logc;
+#else
+ e.invc[0] = T (i[0]).invc;
+ e.logc[0] = T (i[0]).logc;
+ e.invc[1] = T (i[1]).invc;
+ e.logc[1] = T (i[1]).logc;
+#endif
+ return e;
+}
+
+static inline v_f64_t
+log_inline (v_f64_t x)
+{
+ /* Double-precision vector log, copied from math/v_log.c with some cosmetic
+ modification and special-cases removed. See that file for details of the
+ algorithm used. */
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t tmp = ix - OFF;
+ v_u64_t i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
+ v_s64_t k = v_as_s64_u64 (tmp) >> 52;
+ v_u64_t iz = ix - (tmp & 0xfffULL << 52);
+ v_f64_t z = v_as_f64_u64 (iz);
+ struct entry e = lookup (i);
+ v_f64_t r = v_fma_f64 (z, e.invc, v_f64 (-1.0));
+ v_f64_t kd = v_to_f64_s64 (k);
+ v_f64_t hi = v_fma_f64 (kd, Ln2, e.logc + r);
+ v_f64_t r2 = r * r;
+ v_f64_t y = v_fma_f64 (A (3), r, A (2));
+ v_f64_t p = v_fma_f64 (A (1), r, A (0));
+ y = v_fma_f64 (A (4), r2, y);
+ y = v_fma_f64 (y, r2, p);
+ y = v_fma_f64 (y, r2, hi);
+ return y;
+}
+
+/* Double-precision implementation of vector asinh(x).
+ asinh is very sensitive around 1, so it is impractical to devise a single
+ low-cost algorithm which is sufficiently accurate on a wide range of input.
+ Instead we use two different algorithms:
+ asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1
+ = sign(x) * (|x| + |x|^3 * P(x^2)) otherwise
+ where log(x) is an optimized log approximation, and P(x) is a polynomial
+ shared with the scalar routine. The greatest observed error 3.29 ULP, in
+ |x| >= 1:
+ __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1
+ want 0x1.ffffcfd0e2352p-1. */
+VPCS_ATTR v_f64_t V_NAME (asinh) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t iax = ix & AbsMask;
+ v_f64_t ax = v_as_f64_u64 (iax);
+ v_u64_t top12 = iax >> 52;
+
+ v_u64_t gt1 = v_cond_u64 (top12 >= OneTop);
+ v_u64_t special = v_cond_u64 (top12 >= HugeBound);
+
+#if WANT_SIMD_EXCEPT
+ v_u64_t tiny = v_cond_u64 (top12 < TinyBound);
+ special |= tiny;
+#endif
+
+ /* Option 1: |x| >= 1.
+ Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).
+ If WANT_SIMD_EXCEPT is enabled, sidestep special values, which will
+ overflow, by setting special lanes to 1. These will be fixed later. */
+ v_f64_t option_1 = v_f64 (0);
+ if (likely (v_any_u64 (gt1)))
+ {
+#if WANT_SIMD_EXCEPT
+ v_f64_t xm = v_sel_f64 (special, v_f64 (1), ax);
+#else
+ v_f64_t xm = ax;
+#endif
+ option_1 = log_inline (xm + v_sqrt_f64 (xm * xm + 1));
+ }
+
+ /* Option 2: |x| < 1.
+ Compute asinh(x) using a polynomial.
+ If WANT_SIMD_EXCEPT is enabled, sidestep special lanes, which will
+ overflow, and tiny lanes, which will underflow, by setting them to 0. They
+ will be fixed later, either by selecting x or falling back to the scalar
+ special-case. The largest observed error in this region is 1.47 ULPs:
+ __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
+ want 0x1.c1d6bf874019cp-1. */
+ v_f64_t option_2 = v_f64 (0);
+ if (likely (v_any_u64 (~gt1)))
+ {
+#if WANT_SIMD_EXCEPT
+ ax = v_sel_f64 (tiny | gt1, v_f64 (0), ax);
+#endif
+ v_f64_t x2 = ax * ax;
+ v_f64_t z2 = x2 * x2;
+ v_f64_t z4 = z2 * z2;
+ v_f64_t z8 = z4 * z4;
+ v_f64_t p = ESTRIN_17 (x2, z2, z4, z8, z8 * z8, C);
+ option_2 = v_fma_f64 (p, x2 * ax, ax);
+#if WANT_SIMD_EXCEPT
+ option_2 = v_sel_f64 (tiny, x, option_2);
+#endif
+ }
+
+ /* Choose the right option for each lane. */
+ v_f64_t y = v_sel_f64 (gt1, option_1, option_2);
+ /* Copy sign. */
+ y = v_as_f64_u64 (v_bsl_u64 (AbsMask, v_as_u64_f64 (y), ix));
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (asinh), 2.80)
+PL_TEST_EXPECT_FENV (V_NAME (asinh), WANT_SIMD_EXCEPT)
+/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
+ Ensures the v_sel is choosing the right option in all cases. */
+#define V_ASINH_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL_C (V_NAME (asinh), lo, hi, n, 0.5) \
+ PL_TEST_INTERVAL_C (V_NAME (asinh), lo, hi, n, 2) \
+ PL_TEST_INTERVAL_C (V_NAME (asinh), lo, hi, n, 0x1p600)
+V_ASINH_INTERVAL (0, 0x1p-26, 50000)
+V_ASINH_INTERVAL (0x1p-26, 1, 50000)
+V_ASINH_INTERVAL (1, 0x1p511, 50000)
+V_ASINH_INTERVAL (0x1p511, inf, 40000)
+V_ASINH_INTERVAL (-0, -0x1p-26, 50000)
+V_ASINH_INTERVAL (-0x1p-26, -1, 50000)
+V_ASINH_INTERVAL (-1, -0x1p511, 50000)
+V_ASINH_INTERVAL (-0x1p511, -inf, 40000)
+#endif
diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c
new file mode 100644
index 0000000..9d8c8a9
--- /dev/null
+++ b/pl/math/v_asinhf_2u7.c
@@ -0,0 +1,70 @@
+/*
+ * Single-precision vector asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "include/mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define SignMask v_u32 (0x80000000)
+#define One v_f32 (1.0f)
+#define BigBound v_u32 (0x5f800000) /* asuint(0x1p64). */
+#define TinyBound v_u32 (0x30800000) /* asuint(0x1p-30). */
+
+#include "v_log1pf_inline.h"
+
+static NOINLINE v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t special)
+{
+ return v_call_f32 (asinhf, x, y, special);
+}
+
+/* Single-precision implementation of vector asinh(x), using vector log1p.
+ Worst-case error is 2.66 ULP, at roughly +/-0.25:
+ __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */
+VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t iax = ix & ~SignMask;
+ v_u32_t sign = ix & SignMask;
+ v_f32_t ax = v_as_f32_u32 (iax);
+ v_u32_t special = v_cond_u32 (iax >= BigBound);
+
+#if WANT_SIMD_EXCEPT
+ /* Sidestep tiny and large values to avoid inadvertently triggering
+ under/overflow. */
+ special |= v_cond_u32 (iax < TinyBound);
+ if (unlikely (v_any_u32 (special)))
+ ax = v_sel_f32 (special, One, ax);
+#endif
+
+ /* asinh(x) = log(x + sqrt(x * x + 1)).
+ For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */
+ v_f32_t d = One + v_sqrt_f32 (ax * ax + One);
+ v_f32_t y = log1pf_inline (ax + ax * ax / d);
+ y = v_as_f32_u32 (sign | v_as_u32_f32 (y));
+
+ if (unlikely (v_any_u32 (special)))
+ return specialcase (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (asinhf), 2.17)
+PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (asinhf), 0, 0x1p-12, 40000)
+PL_TEST_INTERVAL (V_NAME (asinhf), 0x1p-12, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME (asinhf), 1.0, 0x1p11, 40000)
+PL_TEST_INTERVAL (V_NAME (asinhf), 0x1p11, inf, 40000)
+PL_TEST_INTERVAL (V_NAME (asinhf), 0, -0x1p-12, 20000)
+PL_TEST_INTERVAL (V_NAME (asinhf), -0x1p-12, -1.0, 20000)
+PL_TEST_INTERVAL (V_NAME (asinhf), -1.0, -0x1p11, 20000)
+PL_TEST_INTERVAL (V_NAME (asinhf), -0x1p11, -inf, 20000)
+#endif
diff --git a/pl/math/v_atan2_3u.c b/pl/math/v_atan2_3u.c
new file mode 100644
index 0000000..6327fea
--- /dev/null
+++ b/pl/math/v_atan2_3u.c
@@ -0,0 +1,90 @@
+/*
+ * Double-precision vector atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#include "atan_common.h"
+
+#define PiOver2 v_f64 (0x1.921fb54442d18p+0)
+#define SignMask v_u64 (0x8000000000000000)
+
+/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls). */
+VPCS_ATTR
+NOINLINE static v_f64_t
+specialcase (v_f64_t y, v_f64_t x, v_f64_t ret, v_u64_t cmp)
+{
+ return v_call2_f64 (atan2, y, x, ret, cmp);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan. */
+static inline v_u64_t
+zeroinfnan (v_u64_t i)
+{
+ return v_cond_u64 (2 * i - 1 >= v_u64 (2 * asuint64 (INFINITY) - 1));
+}
+
+/* Fast implementation of vector atan2.
+ Maximum observed error is 2.8 ulps:
+ v_atan2(0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5)
+ got 0x1.92d628ab678ccp-1
+ want 0x1.92d628ab678cfp-1. */
+VPCS_ATTR
+v_f64_t V_NAME (atan2) (v_f64_t y, v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t iy = v_as_u64_f64 (y);
+
+ v_u64_t special_cases = zeroinfnan (ix) | zeroinfnan (iy);
+
+ v_u64_t sign_x = ix & SignMask;
+ v_u64_t sign_y = iy & SignMask;
+ v_u64_t sign_xy = sign_x ^ sign_y;
+
+ v_f64_t ax = v_abs_f64 (x);
+ v_f64_t ay = v_abs_f64 (y);
+
+ v_u64_t pred_xlt0 = x < 0.0;
+ v_u64_t pred_aygtax = ay > ax;
+
+ /* Set up z for call to atan. */
+ v_f64_t n = v_sel_f64 (pred_aygtax, -ax, ay);
+ v_f64_t d = v_sel_f64 (pred_aygtax, ay, ax);
+ v_f64_t z = v_div_f64 (n, d);
+
+ /* Work out the correct shift. */
+ v_f64_t shift = v_sel_f64 (pred_xlt0, v_f64 (-2.0), v_f64 (0.0));
+ shift = v_sel_f64 (pred_aygtax, shift + 1.0, shift);
+ shift *= PiOver2;
+
+ v_f64_t ret = eval_poly (z, z, shift);
+
+ /* Account for the sign of x and y. */
+ ret = v_as_f64_u64 (v_as_u64_f64 (ret) ^ sign_xy);
+
+ if (unlikely (v_any_u64 (special_cases)))
+ {
+ return specialcase (y, x, ret, special_cases);
+ }
+
+ return ret;
+}
+VPCS_ALIAS
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (V, D, 2, atan2)
+// TODO tighten this once __v_atan2 is fixed
+PL_TEST_ULP (V_NAME (atan2), 2.9)
+PL_TEST_INTERVAL (V_NAME (atan2), -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (V_NAME (atan2), -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME (atan2), 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME (atan2), 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (V_NAME (atan2), 1e6, 1e32, 40000)
+#endif
diff --git a/pl/math/v_atan2f_3u.c b/pl/math/v_atan2f_3u.c
new file mode 100644
index 0000000..5d1e6ca
--- /dev/null
+++ b/pl/math/v_atan2f_3u.c
@@ -0,0 +1,89 @@
+/*
+ * Single-precision vector atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#include "atanf_common.h"
+
+/* Useful constants. */
+#define PiOver2 v_f32 (0x1.921fb6p+0f)
+#define SignMask v_u32 (0x80000000)
+
+/* Special cases i.e. 0, infinity and nan (fall back to scalar calls). */
+VPCS_ATTR
+NOINLINE static v_f32_t
+specialcase (v_f32_t y, v_f32_t x, v_f32_t ret, v_u32_t cmp)
+{
+ return v_call2_f32 (atan2f, y, x, ret, cmp);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan. */
+static inline v_u32_t
+zeroinfnan (v_u32_t i)
+{
+ return v_cond_u32 (2 * i - 1 >= v_u32 (2 * 0x7f800000lu - 1));
+}
+
+/* Fast implementation of vector atan2f. Maximum observed error is
+ 2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]:
+ v_atan2(0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
+ want 0x1.967f00p-1. */
+VPCS_ATTR
+v_f32_t V_NAME (atan2f) (v_f32_t y, v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t iy = v_as_u32_f32 (y);
+
+ v_u32_t special_cases = zeroinfnan (ix) | zeroinfnan (iy);
+
+ v_u32_t sign_x = ix & SignMask;
+ v_u32_t sign_y = iy & SignMask;
+ v_u32_t sign_xy = sign_x ^ sign_y;
+
+ v_f32_t ax = v_abs_f32 (x);
+ v_f32_t ay = v_abs_f32 (y);
+
+ v_u32_t pred_xlt0 = x < 0.0f;
+ v_u32_t pred_aygtax = ay > ax;
+
+ /* Set up z for call to atanf. */
+ v_f32_t n = v_sel_f32 (pred_aygtax, -ax, ay);
+ v_f32_t d = v_sel_f32 (pred_aygtax, ay, ax);
+ v_f32_t z = v_div_f32 (n, d);
+
+ /* Work out the correct shift. */
+ v_f32_t shift = v_sel_f32 (pred_xlt0, v_f32 (-2.0f), v_f32 (0.0f));
+ shift = v_sel_f32 (pred_aygtax, shift + 1.0f, shift);
+ shift *= PiOver2;
+
+ v_f32_t ret = eval_poly (z, z, shift);
+
+ /* Account for the sign of y. */
+ ret = v_as_f32_u32 (v_as_u32_f32 (ret) ^ sign_xy);
+
+ if (unlikely (v_any_u32 (special_cases)))
+ {
+ return specialcase (y, x, ret, special_cases);
+ }
+
+ return ret;
+}
+VPCS_ALIAS
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (V, F, 2, atan2)
+PL_TEST_ULP (V_NAME (atan2f), 2.46)
+PL_TEST_INTERVAL (V_NAME (atan2f), -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (V_NAME (atan2f), -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME (atan2f), 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME (atan2f), 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (V_NAME (atan2f), 1e6, 1e32, 40000)
+#endif
diff --git a/pl/math/v_atan_2u5.c b/pl/math/v_atan_2u5.c
new file mode 100644
index 0000000..0f3c2cc
--- /dev/null
+++ b/pl/math/v_atan_2u5.c
@@ -0,0 +1,74 @@
+/*
+ * Double-precision vector atan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#include "atan_common.h"
+
+#define PiOver2 v_f64 (0x1.921fb54442d18p+0)
+#define AbsMask v_u64 (0x7fffffffffffffff)
+#define TinyBound 0x3e1 /* top12(asuint64(0x1p-30)). */
+#define BigBound 0x434 /* top12(asuint64(0x1p53)). */
+
+/* Fast implementation of vector atan.
+ Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
+ __v_atan(0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
+ want 0x1.9225645bdd7c3p-1. */
+VPCS_ATTR
+v_f64_t V_NAME (atan) (v_f64_t x)
+{
+ /* Small cases, infs and nans are supported by our approximation technique,
+ but do not set fenv flags correctly. Only trigger special case if we need
+ fenv. */
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t sign = ix & ~AbsMask;
+
+#if WANT_SIMD_EXCEPT
+ v_u64_t ia12 = (ix >> 52) & 0x7ff;
+ v_u64_t special = v_cond_u64 (ia12 - TinyBound > BigBound - TinyBound);
+ /* If any lane is special, fall back to the scalar routine for all lanes. */
+ if (unlikely (v_any_u64 (special)))
+ return v_call_f64 (atan, x, v_f64 (0), v_u64 (-1));
+#endif
+
+ /* Argument reduction:
+ y := arctan(x) for x < 1
+ y := pi/2 + arctan(-1/x) for x > 1
+ Hence, use z=-1/a if x>=1, otherwise z=a. */
+ v_u64_t red = v_cagt_f64 (x, v_f64 (1.0));
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ v_f64_t z = v_sel_f64 (red, v_div_f64 (v_f64 (-1.0), x), x);
+ v_f64_t shift = v_sel_f64 (red, PiOver2, v_f64 (0.0));
+ /* Use absolute value only when needed (odd powers of z). */
+ v_f64_t az = v_abs_f64 (z);
+ az = v_sel_f64 (red, -az, az);
+
+ /* Calculate the polynomial approximation. */
+ v_f64_t y = eval_poly (z, az, shift);
+
+ /* y = atan(x) if x>0, -atan(-x) otherwise. */
+ y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (atan), 1.78)
+PL_TEST_EXPECT_FENV (V_NAME (atan), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (atan), 0, 0x1p-30, 10000)
+PL_TEST_INTERVAL (V_NAME (atan), -0, -0x1p-30, 1000)
+PL_TEST_INTERVAL (V_NAME (atan), 0x1p-30, 0x1p53, 900000)
+PL_TEST_INTERVAL (V_NAME (atan), -0x1p-30, -0x1p53, 90000)
+PL_TEST_INTERVAL (V_NAME (atan), 0x1p53, inf, 10000)
+PL_TEST_INTERVAL (V_NAME (atan), -0x1p53, -inf, 1000)
+
+#endif
diff --git a/pl/math/v_atanf_3u.c b/pl/math/v_atanf_3u.c
new file mode 100644
index 0000000..67d90b9
--- /dev/null
+++ b/pl/math/v_atanf_3u.c
@@ -0,0 +1,83 @@
+/*
+ * Single-precision vector atan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#include "atanf_common.h"
+
+#define PiOver2 v_f32 (0x1.921fb6p+0f)
+#define AbsMask v_u32 (0x7fffffff)
+#define TinyBound 0x308 /* top12(asuint(0x1p-30)). */
+#define BigBound 0x4e8 /* top12(asuint(0x1p30)). */
+
+#if WANT_SIMD_EXCEPT
+static NOINLINE v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t special)
+{
+ return v_call_f32 (atanf, x, y, special);
+}
+#endif
+
+/* Fast implementation of vector atanf based on
+ atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
+ using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps:
+ v_atanf(0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1. */
+VPCS_ATTR
+v_f32_t V_NAME (atanf) (v_f32_t x)
+{
+ /* Small cases, infs and nans are supported by our approximation technique,
+ but do not set fenv flags correctly. Only trigger special case if we need
+ fenv. */
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t sign = ix & ~AbsMask;
+
+#if WANT_SIMD_EXCEPT
+ v_u32_t ia12 = (ix >> 20) & 0x7ff;
+ v_u32_t special = v_cond_u32 (ia12 - TinyBound > BigBound - TinyBound);
+ /* If any lane is special, fall back to the scalar routine for all lanes. */
+ if (unlikely (v_any_u32 (special)))
+ return specialcase (x, x, v_u32 (-1));
+#endif
+
+ /* Argument reduction:
+ y := arctan(x) for x < 1
+ y := pi/2 + arctan(-1/x) for x > 1
+ Hence, use z=-1/a if x>=1, otherwise z=a. */
+ v_u32_t red = v_cagt_f32 (x, v_f32 (1.0));
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ v_f32_t z = v_sel_f32 (red, v_div_f32 (v_f32 (-1.0f), x), x);
+ v_f32_t shift = v_sel_f32 (red, PiOver2, v_f32 (0.0f));
+ /* Use absolute value only when needed (odd powers of z). */
+ v_f32_t az = v_abs_f32 (z);
+ az = v_sel_f32 (red, -az, az);
+
+ /* Calculate the polynomial approximation. */
+ v_f32_t y = eval_poly (z, az, shift);
+
+ /* y = atan(x) if x>0, -atan(-x) otherwise. */
+ y = v_as_f32_u32 (v_as_u32_f32 (y) ^ sign);
+
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (atanf), 2.5)
+PL_TEST_EXPECT_FENV (V_NAME (atanf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (atanf), 0, 0x1p-30, 5000)
+PL_TEST_INTERVAL (V_NAME (atanf), -0, -0x1p-30, 5000)
+PL_TEST_INTERVAL (V_NAME (atanf), 0x1p-30, 1, 40000)
+PL_TEST_INTERVAL (V_NAME (atanf), -0x1p-30, -1, 40000)
+PL_TEST_INTERVAL (V_NAME (atanf), 1, 0x1p30, 40000)
+PL_TEST_INTERVAL (V_NAME (atanf), -1, -0x1p30, 40000)
+PL_TEST_INTERVAL (V_NAME (atanf), 0x1p30, inf, 1000)
+PL_TEST_INTERVAL (V_NAME (atanf), -0x1p30, -inf, 1000)
+#endif
diff --git a/pl/math/v_atanh_3u5.c b/pl/math/v_atanh_3u5.c
new file mode 100644
index 0000000..bfaf5c2
--- /dev/null
+++ b/pl/math/v_atanh_3u5.c
@@ -0,0 +1,61 @@
+/*
+ * Double-precision vector atanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pairwise_horner.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define WANT_V_LOG1P_K0_SHORTCUT 0
+#include "v_log1p_inline.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define One 0x3ff0000000000000
+
+VPCS_ATTR
+NOINLINE static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t special)
+{
+ return v_call_f64 (atanh, x, y, special);
+}
+
+/* Approximation for vector double-precision atanh(x) using modified log1p.
+ The greatest observed error is 3.31 ULP:
+ __v_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
+ want 0x1.ffd8ff31b501cp-6. */
+VPCS_ATTR
+v_f64_t V_NAME (atanh) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t sign = ix & ~AbsMask;
+ v_u64_t ia = ix & AbsMask;
+ v_u64_t special = v_cond_u64 (ia >= One);
+ v_f64_t halfsign = v_as_f64_u64 (sign | Half);
+
+ /* Mask special lanes with 0 to prevent spurious underflow. */
+ v_f64_t ax = v_sel_f64 (special, v_f64 (0), v_as_f64_u64 (ia));
+ v_f64_t y = halfsign * log1p_inline ((2 * ax) / (1 - ax));
+
+ if (unlikely (v_any_u64 (special)))
+ return specialcase (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, atanh, -1.0, 1.0)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (atanh))
+PL_TEST_ULP (V_NAME (atanh), 3.32)
+PL_TEST_INTERVAL_C (V_NAME (atanh), 0, 0x1p-23, 10000, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanh), -0, -0x1p-23, 10000, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanh), 0x1p-23, 1, 90000, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanh), -0x1p-23, -1, 90000, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanh), 1, inf, 100, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanh), -1, -inf, 100, 0)
+#endif
diff --git a/pl/math/v_atanhf_3u1.c b/pl/math/v_atanhf_3u1.c
new file mode 100644
index 0000000..cd30696
--- /dev/null
+++ b/pl/math/v_atanhf_3u1.c
@@ -0,0 +1,62 @@
+/*
+ * Single-precision vector atanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#include "v_log1pf_inline.h"
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define TinyBound 0x39800000 /* 0x1p-12, below which atanhf(x) rounds to x. */
+
+/* Approximation for vector single-precision atanh(x) using modified log1p.
+ The maximum error is 3.08 ULP:
+ __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
+ want 0x1.ffcb82p-5. */
+VPCS_ATTR v_f32_t V_NAME (atanhf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_f32_t halfsign
+ = v_as_f32_u32 (v_bsl_u32 (v_u32 (AbsMask), v_u32 (Half), ix));
+ v_u32_t iax = ix & AbsMask;
+
+ v_f32_t ax = v_as_f32_u32 (iax);
+
+#if WANT_SIMD_EXCEPT
+ v_u32_t special = v_cond_u32 ((iax >= One) | (iax <= TinyBound));
+ /* Side-step special cases by setting those lanes to 0, which will trigger no
+ exceptions. These will be fixed up later. */
+ if (unlikely (v_any_u32 (special)))
+ ax = v_sel_f32 (special, v_f32 (0), ax);
+#else
+ v_u32_t special = v_cond_u32 (iax >= One);
+#endif
+
+ v_f32_t y = halfsign * log1pf_inline ((2 * ax) / (1 - ax));
+
+ if (unlikely (v_any_u32 (special)))
+ return v_call_f32 (atanhf, x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, atanh, -1.0, 1.0)
+PL_TEST_ULP (V_NAME (atanhf), 2.59)
+PL_TEST_EXPECT_FENV (V_NAME (atanhf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL_C (V_NAME (atanhf), 0, 0x1p-12, 500, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanhf), 0x1p-12, 1, 200000, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanhf), 1, inf, 1000, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanhf), -0, -0x1p-12, 500, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanhf), -0x1p-12, -1, 200000, 0)
+PL_TEST_INTERVAL_C (V_NAME (atanhf), -1, -inf, 1000, 0)
+#endif
diff --git a/pl/math/v_cbrt_2u.c b/pl/math/v_cbrt_2u.c
new file mode 100644
index 0000000..d5abe41
--- /dev/null
+++ b/pl/math/v_cbrt_2u.c
@@ -0,0 +1,98 @@
+/*
+ * Double-precision vector cbrt(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define AbsMask 0x7fffffffffffffff
+#define TwoThirds v_f64 (0x1.5555555555555p-1)
+#define TinyBound 0x001 /* top12 (smallest_normal). */
+#define BigBound 0x7ff /* top12 (infinity). */
+#define MantissaMask v_u64 (0x000fffffffffffff)
+#define HalfExp v_u64 (0x3fe0000000000000)
+
+#define C(i) v_f64 (__cbrt_data.poly[i])
+#define T(i) v_lookup_f64 (__cbrt_data.table, i)
+
+static NOINLINE v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t special)
+{
+ return v_call_f64 (cbrt, x, y, special);
+}
+
+/* Approximation for double-precision vector cbrt(x), using low-order polynomial
+ and two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
+ according to the exponent, for instance an error observed for double value
+ m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
+ integer.
+ __v_cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
+ want 0x1.965fe72821e99p+0. */
+VPCS_ATTR v_f64_t V_NAME (cbrt) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t iax = ix & AbsMask;
+ v_u64_t ia12 = iax >> 52;
+
+ /* Subnormal, +/-0 and special values. */
+ v_u64_t special = v_cond_u64 ((ia12 < TinyBound) | (ia12 >= BigBound));
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexp, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ v_f64_t m = v_as_f64_u64 (v_bsl_u64 (MantissaMask, iax, HalfExp));
+ v_s64_t e = v_as_s64_u64 (iax >> 52) - 1022;
+
+ /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
+ Newton iterations. */
+ v_f64_t p_01 = v_fma_f64 (C (1), m, C (0));
+ v_f64_t p_23 = v_fma_f64 (C (3), m, C (2));
+ v_f64_t p = v_fma_f64 (m * m, p_23, p_01);
+
+ /* Two iterations of Newton's method for iteratively approximating cbrt. */
+ v_f64_t m_by_3 = m / 3;
+ v_f64_t a = v_fma_f64 (TwoThirds, p, m_by_3 / (p * p));
+ a = v_fma_f64 (TwoThirds, a, m_by_3 / (a * a));
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which is
+ an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+
+ v_s64_t ey = e / 3;
+ v_f64_t my = a * T (v_as_u64_s64 (e % 3 + 2));
+
+ /* Vector version of ldexp. */
+ v_f64_t y = v_as_f64_u64 ((v_as_u64_s64 (ey + 1023) << 52)) * my;
+ /* Copy sign. */
+ y = v_as_f64_u64 (v_bsl_u64 (v_u64 (AbsMask), v_as_u64_f64 (y), ix));
+
+ if (unlikely (v_any_u64 (special)))
+ return specialcase (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_TEST_ULP (V_NAME (cbrt), 1.30)
+PL_SIG (V, D, 1, cbrt, -10.0, 10.0)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (cbrt))
+PL_TEST_INTERVAL (V_NAME (cbrt), 0, inf, 1000000)
+PL_TEST_INTERVAL (V_NAME (cbrt), -0, -inf, 1000000)
+#endif
diff --git a/pl/math/v_cbrtf_1u5.c b/pl/math/v_cbrtf_1u5.c
new file mode 100644
index 0000000..62fa375
--- /dev/null
+++ b/pl/math/v_cbrtf_1u5.c
@@ -0,0 +1,96 @@
+/*
+ * Single-precision vector cbrt(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define AbsMask 0x7fffffff
+#define SignMask v_u32 (0x80000000)
+#define TwoThirds v_f32 (0x1.555556p-1f)
+#define SmallestNormal 0x00800000
+#define MantissaMask 0x007fffff
+#define HalfExp 0x3f000000
+
+#define C(i) v_f32 (__cbrtf_data.poly[i])
+#define T(i) v_lookup_f32 (__cbrtf_data.table, i)
+
+static NOINLINE v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t special)
+{
+ return v_call_f32 (cbrtf, x, y, special);
+}
+
+/* Approximation for vector single-precision cbrt(x) using Newton iteration with
+ initial guess obtained by a low-order polynomial. Greatest error is 1.5 ULP.
+ This is observed for every value where the mantissa is 0x1.81410e and the
+ exponent is a multiple of 3, for example:
+ __v_cbrtf(0x1.81410ep+30) got 0x1.255d96p+10
+ want 0x1.255d92p+10. */
+VPCS_ATTR v_f32_t V_NAME (cbrtf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t iax = ix & AbsMask;
+
+ /* Subnormal, +/-0 and special values. */
+ v_u32_t special = v_cond_u32 ((iax < SmallestNormal) | (iax >= 0x7f800000));
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexpf, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ v_f32_t m = v_as_f32_u32 ((iax & MantissaMask) | HalfExp);
+ v_s32_t e = v_as_s32_u32 (iax >> 23) - 126;
+
+ /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+ the less accurate the next stage of the algorithm needs to be. An order-4
+ polynomial is enough for one Newton iteration. */
+ v_f32_t p_01 = v_fma_f32 (C (1), m, C (0));
+ v_f32_t p_23 = v_fma_f32 (C (3), m, C (2));
+ v_f32_t p = v_fma_f32 (m * m, p_23, p_01);
+
+ /* One iteration of Newton's method for iteratively approximating cbrt. */
+ v_f32_t m_by_3 = m / 3;
+ v_f32_t a = v_fma_f32 (TwoThirds, p, m_by_3 / (p * p));
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which is
+ an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+
+ v_s32_t ey = e / 3;
+ v_f32_t my = a * T (v_as_u32_s32 (e % 3 + 2));
+
+ /* Vector version of ldexpf. */
+ v_f32_t y = v_as_f32_u32 ((v_as_u32_s32 (ey + 127) << 23)) * my;
+ /* Copy sign. */
+ y = v_as_f32_u32 (v_bsl_u32 (SignMask, ix, v_as_u32_f32 (y)));
+
+ if (unlikely (v_any_u32 (special)))
+ return specialcase (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, cbrt, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (cbrtf), 1.03)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (cbrtf))
+PL_TEST_INTERVAL (V_NAME (cbrtf), 0, inf, 1000000)
+PL_TEST_INTERVAL (V_NAME (cbrtf), -0, -inf, 1000000)
+#endif
diff --git a/pl/math/v_cosh_2u.c b/pl/math/v_cosh_2u.c
new file mode 100644
index 0000000..0a9fbf8
--- /dev/null
+++ b/pl/math/v_cosh_2u.c
@@ -0,0 +1,96 @@
+/*
+ * Double-precision vector cosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "v_exp_tail.h"
+
+#define C1 v_f64 (C1_scal)
+#define C2 v_f64 (C2_scal)
+#define C3 v_f64 (C3_scal)
+#define InvLn2 v_f64 (InvLn2_scal)
+#define Ln2hi v_f64 (Ln2hi_scal)
+#define Ln2lo v_f64 (Ln2lo_scal)
+#define IndexMask v_u64 (IndexMask_scal)
+#define Shift v_f64 (Shift_scal)
+#define Thres v_f64 (Thres_scal)
+
+#define AbsMask 0x7fffffffffffffff
+#define Half v_f64 (0.5)
+#define SpecialBound \
+ 0x4086000000000000 /* 0x1.6p9, above which exp overflows. */
+
+#if V_SUPPORTED
+
+static inline v_f64_t
+exp_inline (v_f64_t x)
+{
+ /* Helper for approximating exp(x). Copied from v_exp_tail, with no
+ special-case handling or tail. */
+
+ /* n = round(x/(ln2/N)). */
+ v_f64_t z = v_fma_f64 (x, InvLn2, Shift);
+ v_u64_t u = v_as_u64_f64 (z);
+ v_f64_t n = z - Shift;
+
+ /* r = x - n*ln2/N. */
+ v_f64_t r = x;
+ r = v_fma_f64 (-Ln2hi, n, r);
+ r = v_fma_f64 (-Ln2lo, n, r);
+
+ v_u64_t e = u << (52 - V_EXP_TAIL_TABLE_BITS);
+ v_u64_t i = u & IndexMask;
+
+ /* y = tail + exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4. */
+ v_f64_t y = v_fma_f64 (C3, r, C2);
+ y = v_fma_f64 (y, r, C1);
+ y = v_fma_f64 (y, r, v_f64 (1)) * r;
+
+ /* s = 2^(n/N). */
+ u = v_lookup_u64 (Tab, i);
+ v_f64_t s = v_as_f64_u64 (u + e);
+
+ return v_fma_f64 (y, s, s);
+}
+
+/* Approximation for vector double-precision cosh(x) using exp_inline.
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The greatest observed error is in the scalar fall-back region, so is the same
+ as the scalar routine, 1.93 ULP:
+ __v_cosh(0x1.628af341989dap+9) got 0x1.fdf28623ef921p+1021
+ want 0x1.fdf28623ef923p+1021.
+
+ The greatest observed error in the non-special region is 1.54 ULP:
+ __v_cosh(0x1.8e205b6ecacf7p+2) got 0x1.f711dcb0c77afp+7
+ want 0x1.f711dcb0c77b1p+7. */
+VPCS_ATTR v_f64_t V_NAME (cosh) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t iax = ix & AbsMask;
+ v_u64_t special = v_cond_u64 (iax > SpecialBound);
+
+ /* If any inputs are special, fall back to scalar for all lanes. */
+ if (unlikely (v_any_u64 (special)))
+ return v_call_f64 (cosh, x, x, v_u64 (-1));
+
+ v_f64_t ax = v_as_f64_u64 (iax);
+ /* Up to the point that exp overflows, we can use it to calculate cosh by
+ exp(|x|) / 2 + 1 / (2 * exp(|x|)). */
+ v_f64_t t = exp_inline (ax);
+ return t * Half + Half / t;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (cosh), 1.43)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (cosh))
+PL_TEST_INTERVAL (V_NAME (cosh), 0, 0x1.6p9, 100000)
+PL_TEST_INTERVAL (V_NAME (cosh), -0, -0x1.6p9, 100000)
+PL_TEST_INTERVAL (V_NAME (cosh), 0x1.6p9, inf, 1000)
+PL_TEST_INTERVAL (V_NAME (cosh), -0x1.6p9, -inf, 1000)
+#endif
diff --git a/pl/math/v_coshf_2u4.c b/pl/math/v_coshf_2u4.c
new file mode 100644
index 0000000..1422d4d
--- /dev/null
+++ b/pl/math/v_coshf_2u4.c
@@ -0,0 +1,74 @@
+/*
+ * Single-precision vector cosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define TinyBound 0x20000000 /* 0x1p-63: Round to 1 below this. */
+#define SpecialBound \
+ 0x42ad496c /* 0x1.5a92d8p+6: expf overflows above this, so have to use \
+ special case. */
+#define Half v_f32 (0.5)
+
+#if V_SUPPORTED
+
+v_f32_t V_NAME (expf) (v_f32_t);
+
+/* Single-precision vector cosh, using vector expf.
+ Maximum error is 2.38 ULP:
+ __v_coshf(0x1.e8001ep+1) got 0x1.6a491ep+4 want 0x1.6a4922p+4. */
+VPCS_ATTR v_f32_t V_NAME (coshf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t iax = ix & AbsMask;
+ v_f32_t ax = v_as_f32_u32 (iax);
+ v_u32_t special = v_cond_u32 (iax >= SpecialBound);
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered correctly, fall back to the scalar
+ variant for all inputs if any input is a special value or above the bound
+ at which expf overflows. */
+ if (unlikely (v_any_u32 (special)))
+ return v_call_f32 (coshf, x, x, v_u32 (-1));
+
+ v_u32_t tiny = v_cond_u32 (iax <= TinyBound);
+ /* If any input is tiny, avoid underflow exception by fixing tiny lanes of
+ input to 1, which will generate no exceptions, and then also fixing tiny
+ lanes of output to 1 just before return. */
+ if (unlikely (v_any_u32 (tiny)))
+ ax = v_sel_f32 (tiny, v_f32 (1), ax);
+#endif
+
+ /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */
+ v_f32_t t = V_NAME (expf) (ax);
+ v_f32_t y = t * Half + Half / t;
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (tiny)))
+ return v_sel_f32 (tiny, v_f32 (1), y);
+#else
+ if (unlikely (v_any_u32 (special)))
+ return v_call_f32 (coshf, x, y, special);
+#endif
+
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (coshf), 1.89)
+PL_TEST_EXPECT_FENV (V_NAME (coshf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (coshf), 0, 0x1p-63, 100)
+PL_TEST_INTERVAL (V_NAME (coshf), 0, 0x1.5a92d8p+6, 80000)
+PL_TEST_INTERVAL (V_NAME (coshf), 0x1.5a92d8p+6, inf, 2000)
+PL_TEST_INTERVAL (V_NAME (coshf), -0, -0x1p-63, 100)
+PL_TEST_INTERVAL (V_NAME (coshf), -0, -0x1.5a92d8p+6, 80000)
+PL_TEST_INTERVAL (V_NAME (coshf), -0x1.5a92d8p+6, -inf, 2000)
+#endif
diff --git a/pl/math/v_erf_2u.c b/pl/math/v_erf_2u.c
new file mode 100644
index 0000000..1d7ddbb
--- /dev/null
+++ b/pl/math/v_erf_2u.c
@@ -0,0 +1,116 @@
+/*
+ * Double-precision vector erf(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "include/mathlib.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define AbsMask v_u64 (0x7fffffffffffffff)
+#define AbsXMax v_f64 (0x1.8p+2)
+#define Scale v_f64 (0x1p+3)
+
+/* Special cases (fall back to scalar calls). */
+VPCS_ATTR
+NOINLINE static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
+{
+ return v_call_f64 (erf, x, y, cmp);
+}
+
+/* A structure to perform look-up in coeffs and other parameter tables. */
+struct entry
+{
+ v_f64_t P[V_ERF_NCOEFFS];
+ v_f64_t shift;
+};
+
+static inline struct entry
+lookup (v_u64_t i)
+{
+ struct entry e;
+#ifdef SCALAR
+ for (int j = 0; j < V_ERF_NCOEFFS; ++j)
+ e.P[j] = __v_erf_data.coeffs[j][i];
+ e.shift = __v_erf_data.shifts[i];
+#else
+ for (int j = 0; j < V_ERF_NCOEFFS; ++j)
+ {
+ e.P[j][0] = __v_erf_data.coeffs[j][i[0]];
+ e.P[j][1] = __v_erf_data.coeffs[j][i[1]];
+ }
+ e.shift[0] = __v_erf_data.shifts[i[0]];
+ e.shift[1] = __v_erf_data.shifts[i[1]];
+#endif
+ return e;
+}
+
+/* Optimized double precision vector error function erf. Maximum
+ observed error is 1.75 ULP, in [0.110, 0.111]:
+ verf(0x1.c5e0c2d5d0543p-4) got 0x1.fe0ed62a54987p-4
+ want 0x1.fe0ed62a54985p-4. */
+VPCS_ATTR
+v_f64_t V_NAME (erf) (v_f64_t x)
+{
+ /* Handle both inf/nan as well as small values (|x|<2^-28)
+ If any condition in the lane is true then a loop over
+ scalar calls will be performed. */
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t atop = (ix >> 48) & v_u64 (0x7fff);
+ v_u64_t special_case
+ = v_cond_u64 (atop - v_u64 (0x3e30) >= v_u64 (0x7ff0 - 0x3e30));
+
+ /* Get sign and absolute value. */
+ v_u64_t sign = v_as_u64_f64 (x) & ~AbsMask;
+ v_f64_t a = v_min_f64 (v_abs_f64 (x), AbsXMax);
+
+ /* Compute index by truncating 8 * a with a=|x| saturated to 6.0. */
+
+#ifdef SCALAR
+ v_u64_t i = v_trunc_u64 (a * Scale);
+#else
+ v_u64_t i = vcvtq_n_u64_f64 (a, 3);
+#endif
+ /* Get polynomial coefficients and shift parameter using lookup. */
+ struct entry dat = lookup (i);
+
+ /* Evaluate polynomial on transformed argument. */
+ v_f64_t z = v_fma_f64 (a, Scale, dat.shift);
+
+ v_f64_t r1 = v_fma_f64 (z, dat.P[1], dat.P[0]);
+ v_f64_t r2 = v_fma_f64 (z, dat.P[3], dat.P[2]);
+ v_f64_t r3 = v_fma_f64 (z, dat.P[5], dat.P[4]);
+ v_f64_t r4 = v_fma_f64 (z, dat.P[7], dat.P[6]);
+ v_f64_t r5 = v_fma_f64 (z, dat.P[9], dat.P[8]);
+
+ v_f64_t z2 = z * z;
+ v_f64_t y = v_fma_f64 (z2, r5, r4);
+ y = v_fma_f64 (z2, y, r3);
+ y = v_fma_f64 (z2, y, r2);
+ y = v_fma_f64 (z2, y, r1);
+
+ /* y=erf(x) if x>0, -erf(-x) otherwise. */
+ y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign);
+
+ if (unlikely (v_any_u64 (special_case)))
+ return specialcase (x, y, special_case);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, erf, -6.0, 6.0)
+PL_TEST_ULP (V_NAME (erf), 1.26)
+PL_TEST_INTERVAL (V_NAME (erf), 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (V_NAME (erf), 0x1p-127, 0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME (erf), -0x1p-127, -0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME (erf), 0x1p-26, 0x1p3, 40000)
+PL_TEST_INTERVAL (V_NAME (erf), -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (V_NAME (erf), 0, inf, 40000)
+#endif
diff --git a/pl/math/v_erf_data.c b/pl/math/v_erf_data.c
new file mode 100644
index 0000000..7bbb281
--- /dev/null
+++ b/pl/math/v_erf_data.c
@@ -0,0 +1,119 @@
+/*
+ * Polynomial coefficients and shifts for double-precision erf(x) vector
+ * function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* 48 intervals of the form [x_i, x_{i+1}] with x_i = i / 8 for
+ i=1,...,47 (x_0 = 2^-1022). There is an extra dummy interval for
+ [6, +inf] with all coeffs = 0 except for P_0 = 1.0, as erf(x) == 1
+ above 6.
+
+ Coefficients for each interval generated using fpminimax algorithm. See
+ v_erf.sollya for details. Note the array is transposed, so for a set of
+ coefficients C generated on interval i, C[j] is at coeffs[j][i]. */
+
+const struct v_erf_data __v_erf_data
+ = {.shifts
+ = {-0x1p-1019, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12,
+ -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25,
+ -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38,
+ -39, -40, -41, -42, -43, -44, -45, -46, -47, 0},
+ .coeffs = {
+ // clang-format off
+
+{0x1.20dd750429b6dp-1022, 0x1.1f5e1a35c3b8ap-3, 0x1.1af54e232d609p-2, 0x1.9dd0d2b721f39p-2, 0x1.0a7ef5c18edd2p-1, 0x1.3f196dcd0f135p-1,
+ 0x1.6c1c9759d0e5fp-1, 0x1.91724951b8fc6p-1, 0x1.af767a741088bp-1, 0x1.c6dad2829ec62p-1, 0x1.d8865d98abe00p-1, 0x1.e5768c3b4a3fcp-1,
+ 0x1.eea5557137ae0p-1, 0x1.f4f693b67bd77p-1, 0x1.f92d077f8d56dp-1, 0x1.fbe61eef4cf6ap-1, 0x1.fd9ae142795e3p-1, 0x1.fea4218d6594ap-1,
+ 0x1.ff404760319b4p-1, 0x1.ff9960f3eb327p-1, 0x1.ffcaa8f4c9beap-1, 0x1.ffe514bbdc197p-1, 0x1.fff2cfb0453d9p-1, 0x1.fff9ba420e834p-1,
+ 0x1.fffd1ac4135f9p-1, 0x1.fffeb3ebb267bp-1, 0x1.ffff6f9f67e55p-1, 0x1.ffffc316d9ed0p-1, 0x1.ffffe710d565ep-1, 0x1.fffff618c3da6p-1,
+ 0x1.fffffc2f171e3p-1, 0x1.fffffe92ced93p-1, 0x1.ffffff7b91176p-1, 0x1.ffffffd169d0cp-1, 0x1.fffffff01a8b6p-1, 0x1.fffffffabd229p-1,
+ 0x1.fffffffe4fa30p-1, 0x1.ffffffff79626p-1, 0x1.ffffffffd759dp-1, 0x1.fffffffff4188p-1, 0x1.fffffffffc9e8p-1, 0x1.ffffffffff11ap-1,
+ 0x1.ffffffffffc05p-1, 0x1.ffffffffffef8p-1, 0x1.fffffffffffbep-1, 0x1.ffffffffffff0p-1, 0x1.ffffffffffffcp-1, 0x1.fffffffffffffp-1, 1.0},
+
+{0x1.20dd750429b6dp-3, 0x1.1c62fa1e86989p-3, 0x1.0f5d1602f7dfbp-3, 0x1.f5f0cdaf152b2p-4, 0x1.c1efca49a5051p-4, 0x1.86e9694134b22p-4,
+ 0x1.492e42d78d39cp-4, 0x1.0cab61f084b1bp-4, 0x1.a911f096fbb79p-5, 0x1.45e99bcbb78d4p-5, 0x1.e4652fadcbaa3p-6, 0x1.5ce595c455bccp-6,
+ 0x1.e723726b81ff1p-7, 0x1.499d478bca4acp-7, 0x1.b055303221566p-8, 0x1.12ceb37ffa389p-8, 0x1.529b9e8cfa59fp-9, 0x1.94624e78e084fp-10,
+ 0x1.d4143a9e023f5p-11, 0x1.06918b63537c2p-11, 0x1.1d83170fcc34bp-12, 0x1.2ce898808f08ep-13, 0x1.3360ccd26e06ap-14, 0x1.30538fbb986fbp-15,
+ 0x1.2408e9bb1b657p-16, 0x1.0f9e1b4e4baaep-17, 0x1.e9b5e8d71b5e3p-19, 0x1.abe09e85af38ap-20, 0x1.6a5972347c568p-21, 0x1.296a70eff1bd9p-22,
+ 0x1.d9371ee6bfc07p-24, 0x1.6ce1a88a01b3ap-25, 0x1.10b14985663f9p-26, 0x1.8b0d07ade43d8p-28, 0x1.155a098eceb0fp-29, 0x1.7974d3b397e7cp-31,
+ 0x1.f1e3bf5a6493ap-33, 0x1.3e47781d91b97p-34, 0x1.8a7038368986cp-36, 0x1.d9d4d7be5992cp-38, 0x1.137dabebc1319p-39, 0x1.367541123e46cp-41,
+ 0x1.58007ab162c1dp-43, 0x1.709f0d280b3f5p-45, 0x1.30a3dcf531ebfp-47, 0x1.d2707c055dedcp-50, 0x1.0d97f61945387p-49, 0x1.1dbc3ab728933p-50, 0},
+
+{0x1.2411381609db0p-51, -0x1.1c62fa1e75c0ap-9, -0x1.0f5d1602eb436p-8, -0x1.78749a4346714p-8, -0x1.c1efca49a7b15p-8, -0x1.e8a3c39178d95p-8,
+ -0x1.edc5644363883p-8, -0x1.d62beb64e19eep-8, -0x1.a911f096f7a87p-8, -0x1.6ea6cf452dca3p-8, -0x1.2ebf3dccb166cp-8, -0x1.dfbbadedfcde6p-9,
+ -0x1.6d5a95d08c346p-9, -0x1.0bcfca21880c9p-9, -0x1.7a4a8a2bf1a0bp-10, -0x1.01a1c8481a466p-10, -0x1.529b9e8d29ddap-11, -0x1.ada873604cf20p-12,
+ -0x1.074b60f960c25p-12, -0x1.37ccd585732c6p-13, -0x1.64e3dcd73a1d3p-14, -0x1.8af14827e93bap-15, -0x1.a6a519ae712fbp-16, -0x1.b5781ea681265p-17,
+ -0x1.b60d5ed744563p-18, -0x1.a8670acc75c29p-19, -0x1.8de3ce2154088p-20, -0x1.690584329096ap-21, -0x1.3d0e478659a54p-22, -0x1.0d8875cb088d0p-23,
+ -0x1.bba3c56e56d69p-25, -0x1.617a60b4bcd87p-26, -0x1.10b16afb9ce08p-27, -0x1.9766e11f62828p-29, -0x1.26afbc55ef33cp-30, -0x1.9cd52c0e709a9p-32,
+ -0x1.18175f6758766p-33, -0x1.705a68dde7f3ap-35, -0x1.d65ba6d52556dp-37, -0x1.23af5c3865987p-38, -0x1.51c72cd64a6bcp-40, -0x1.79f63bbc02f5ap-42,
+ -0x1.2346f2840d7bfp-43, -0x1.8110f614395a8p-45, 0x1.c3309f1fe85a4p-46, 0x1.09e6fb6ee0b85p-46, -0x1.959834938224fp-46, -0x1.0e9a684ecee47p-46, 0},
+
+{-0x1.812746b057b58p-11, -0x1.6f552dbf96b31p-11, -0x1.3c97445cee1b0p-11, -0x1.e106c523a966dp-12, -0x1.2bf5318638e21p-12, -0x1.c8105034ea92fp-14,
+ 0x1.b6e85963275c5p-15, 0x1.7c9d756585d29p-13, 0x1.1b614b0e78122p-12, 0x1.4cb3cf0b42031p-12, 0x1.571d01cf7eeb3p-12, 0x1.4374d82fe7f2ep-12,
+ 0x1.1c2a02b9199a0p-12, 0x1.d6631e131dabap-13, 0x1.7148c3d9d22bap-13, 0x1.143d1c76ae7c6p-13, 0x1.8b0ae3afc07e6p-14, 0x1.0ea475d5b3822p-14,
+ 0x1.63ef6208bd4adp-15, 0x1.c1ec100ec3e71p-16, 0x1.119da13709716p-16, 0x1.407fbd00318a5p-17, 0x1.69cf481b4666cp-18, 0x1.89e17d2b19c42p-19,
+ 0x1.9db7531fa76f6p-20, 0x1.a37382bd61dc8p-21, 0x1.9aa4a8e8fe8dfp-22, 0x1.8451fcde36f23p-23, 0x1.62cd605193fe9p-24, 0x1.394b0d46af85cp-25,
+ 0x1.0b6c0d1191ec9p-26, 0x1.b9581bcc8f4ebp-28, 0x1.603ea0f602119p-29, 0x1.0ff28bc88022cp-30, 0x1.95ecc71a0b4bep-32, 0x1.24ffe516534d4p-33,
+ 0x1.9aa89abeffd90p-35, 0x1.1ab57210158fap-36, 0x1.8b0c503eafbcbp-38, 0x1.166413b8ba611p-39, 0x1.5848fad1e38e9p-42, 0x1.3573cc6d6d4e6p-49,
+ 0x1.404c0dc8b5ffcp-42, 0x1.38779160f5f11p-43, -0x1.1dc84293acf27p-42, -0x1.2892755467252p-43, 0x1.8e40aed4a9e02p-43, 0x1.0cef3bce98bedp-43, 0},
+
+{0x1.4ade8e6d47ef0p-43, 0x1.196c9ee6491cfp-16, 0x1.040e8be6a9625p-15, 0x1.5529ad049b967p-15, 0x1.76f27e1744b44p-15, 0x1.6963c95cd8395p-15,
+ 0x1.349b5d6ae76a6p-15, 0x1.cc6056b95eed3p-16, 0x1.1b614adacb10dp-16, 0x1.ca5080f4ec9b9p-18, -0x1.93a9d54fb750bp-20, -0x1.f3b8d7695d38cp-18,
+ -0x1.6d5a929bfde5fp-17, -0x1.974c013452be9p-17, -0x1.8a0da620ab60fp-17, -0x1.5a3166e1f5682p-17, -0x1.1a2c5ad80a584p-17, -0x1.afe552a6507eep-18,
+ -0x1.38a9879a760b8p-18, -0x1.ae595d5041755p-19, -0x1.1a89c93c4b9c8p-19, -0x1.62d4c3dc10fdbp-20, -0x1.ab0c620cf63d1p-21, -0x1.ed4aeff35fd90p-22,
+ -0x1.11c8e63fae76dp-22, -0x1.2454a1fb4749ap-23, -0x1.2c7f7846b0e7bp-24, -0x1.298c17acfd63ap-25, -0x1.1c0f6cc5baa18p-26, -0x1.0574c9f0e63fap-27,
+ -0x1.d0a5c4232f4cep-29, -0x1.8d9d301253af8p-30, -0x1.49cb78be34c81p-31, -0x1.08fc30eb50526p-32, -0x1.96e2f50cad458p-34, -0x1.2c888ddad994bp-35,
+ -0x1.c5dd3068e7fcap-37, -0x1.935b876ed56ffp-38, -0x1.e74a7c256ba0dp-39, -0x1.1681c73733b50p-39, 0x1.855ab0b8664dep-41, 0x1.4aebdf7fb67e5p-41,
+ -0x1.2aef07c393759p-40, -0x1.37e52b17505e6p-41, 0x1.394b997da7ed5p-40, 0x1.4345440ea9876p-41, -0x1.af227669dca68p-41, -0x1.23589e4f3cc49p-41, 0},
+
+{0x1.ce2f1b1646d4bp-19, 0x1.aaba29a029bd5p-19, 0x1.47e57fbf662a0p-19, 0x1.74882f55f1bd4p-20, 0x1.dfed759bd9091p-23, -0x1.c124b2acb3ee8p-21,
+ -0x1.b429a82901889p-20, -0x1.1350ee93fbfb3p-19, -0x1.1b613a5e1e196p-19, -0x1.f65ceb61aa63ap-20, -0x1.82814da1daaa1p-20, -0x1.f5729185c040ep-21,
+ -0x1.e72489bfea503p-22, -0x1.17d784c065f21p-24, 0x1.b2229e5122850p-23, 0x1.779b916c44358p-22, 0x1.ace7a08f66cb0p-22, 0x1.9973788b8f181p-22,
+ 0x1.5d3bceb9c39d5p-22, 0x1.11da976499339p-22, 0x1.90eaa0d25df91p-23, 0x1.146c19a9f0ae8p-23, 0x1.693a52f5ccd0bp-24, 0x1.c122683fc1404p-25,
+ 0x1.0a866e311e50ap-25, 0x1.2e85588e08741p-26, 0x1.493501a3ee15cp-27, 0x1.572eec204dc18p-28, 0x1.590e0157d4dabp-29, 0x1.4c0619d7359e8p-30,
+ 0x1.36608b7b22d22p-31, 0x1.0e3f514a0d7fep-32, 0x1.e04d29135056ep-34, 0x1.aa936eb977e33p-35, 0x1.3ce1ec4a299b6p-36, 0x1.aba42bc751130p-38,
+ 0x1.0861b5dc819e3p-38, 0x1.3bc7b1f0f8afbp-38, 0x1.7d6c896bf3579p-38, 0x1.14f24be91338cp-38, -0x1.2896024cf2ca9p-39, -0x1.c2e8399d1e8e7p-40,
+ 0x1.7836a61cc0f4bp-39, 0x1.8a98e07f8cdfcp-40, -0x1.8f332379c6ce4p-39, -0x1.9bbec3ab83755p-40, 0x1.126c9c6d24bd6p-39, 0x1.72eaeac065cc2p-40, 0},
+
+{0x1.240b25b9a9823p-39, -0x1.733f879c52150p-24, -0x1.4c00873f3742fp-23, -0x1.9a6fe48163775p-23, -0x1.99ed7481d2399p-23, -0x1.52aea61425cf7p-23,
+ -0x1.b853c3ad1c781p-24, -0x1.53c3e486c1845p-25, 0x1.2e2a4e7a0286dp-26, 0x1.fd0e266132929p-25, 0x1.5cf1d8fe5611fp-24, 0x1.6b140ba72ac56p-24,
+ 0x1.3cab2fa73a9c4p-24, 0x1.d864967df5009p-25, 0x1.25b4551256078p-25, 0x1.0d029bc50b0cdp-26, 0x1.e126485c5dceep-30, -0x1.dd5e4bed818c0p-28,
+ -0x1.7cd1b44dbfdc3p-27, -0x1.981def704f39ep-27, -0x1.6f0e87a0f3e35p-27, -0x1.267c0dc9b6e95p-27, -0x1.b2ec3078bf153p-28, -0x1.2b066605239f5p-28,
+ -0x1.840473ed3d070p-29, -0x1.daf9b9b8c06cap-30, -0x1.1661520cf8a32p-30, -0x1.2fa49c29e30b5p-31, -0x1.4ddfd9d6a7cf4p-32, -0x1.4a55b8564425ap-33,
+ -0x1.5df1ca746f291p-34, -0x1.dd6b8d1ec2e4fp-36, -0x1.34c63d902f888p-36, -0x1.b55b65a1655c0p-37, -0x1.9c1cfd1e2142cp-39, 0x1.98f2b73f288c4p-43,
+ -0x1.3baba91a10af8p-39, -0x1.8cb03e5359e2bp-38, -0x1.16063ce2129afp-37, -0x1.9fd74120d8e00p-38, 0x1.cf0caf7defe71p-39, 0x1.5d029f324f3a7p-39,
+ -0x1.21268c2290cb5p-38, -0x1.2f6de12d74afdp-39, 0x1.332ead763d55ap-38, 0x1.3cd3a7103e138p-39, -0x1.a64e5d1cdb028p-39, -0x1.1d674b3db2a42p-39, 0},
+
+{-0x1.b84a0abf33534p-27, -0x1.89c6cd0cf2b65p-27, -0x1.09bb37091d4aep-27, -0x1.68f777b72ca95p-29, 0x1.60a5240c5ece1p-29, 0x1.c7421c28ef551p-28,
+ 0x1.2e75b6acb2116p-27, 0x1.30f14412b258cp-27, 0x1.f153992d28a09p-28, 0x1.3b80153a3c97bp-28, 0x1.df36fe4b5094cp-30, -0x1.724a2b185f507p-31,
+ -0x1.37cb36ce4237dp-29, -0x1.963d70f677f90p-29, -0x1.8d5c135b0af66p-29, -0x1.42fbc01c11a3bp-29, -0x1.baba060b7adb1p-30, -0x1.eaf481fbc6feap-31,
+ -0x1.5b5d0a354e49cp-32, 0x1.fb57bbdb6f854p-35, 0x1.2423823b5dcaep-32, 0x1.64e9c7f44ececp-32, 0x1.59b6fb115bcefp-32, 0x1.179a1737c24d9p-32,
+ 0x1.a9515bcf95bb0p-33, 0x1.1ca83baba64bdp-33, 0x1.826e7ef89b3cap-34, 0x1.7ab5cb5ca2db0p-35, 0x1.2ce997226e82dp-35, 0x1.fdd14ca5a6d38p-37,
+ 0x1.d35252de2a363p-37, -0x1.8dd5e799b3695p-39, 0x1.047fd46786432p-38, 0x1.aa8639c65a4a4p-38, 0x1.10495d2cdaee5p-41, -0x1.24b2b7e751230p-40,
+ 0x1.e2ec0b9e9b211p-40, 0x1.6203cc50754ffp-38, 0x1.f95c0def7238bp-38, 0x1.7b31a463405b9p-38, -0x1.a826fa90b3c96p-39, -0x1.3f6315812b719p-39,
+ 0x1.0862d42832ac6p-38, 0x1.1575d5fa4614cp-39, -0x1.18eb527929cedp-38, -0x1.21bd844e0e3b8p-39, 0x1.8233e415548a0p-39, 0x1.0501b16f5819bp-39, 0},
+
+{0x1.9b4497171a29dp-39, 0x1.7f9c0bcd4b3e7p-32, 0x1.4928133bccac3p-31, 0x1.7b5a70f49485bp-31, 0x1.4f71ee2c4aff3p-31, 0x1.bca22e6a9cd38p-32,
+ 0x1.1c93a34970852p-33, -0x1.03d86c164d20cp-33, -0x1.448222383eb95p-32, -0x1.95aa76b3417ddp-32, -0x1.80448ecd34689p-32, -0x1.19d3f547d1f1fp-32,
+ -0x1.2c65995a6a63fp-33, -0x1.01b5832823cc6p-35, 0x1.97d70f56a4524p-35, 0x1.7d57df58d20a9p-34, 0x1.a3d6fe32773b9p-34, 0x1.6ff53581ac827p-34,
+ 0x1.faff84d277a6fp-35, 0x1.39ff19e23455bp-35, 0x1.9b1e383b8e03dp-37, 0x1.fd37bce839816p-40, -0x1.31b58a910d109p-37, -0x1.480a28743a67fp-37,
+ -0x1.9a8b926ca51b4p-37, -0x1.14d6b0b9c8256p-37, -0x1.227dfd10a7f51p-37, -0x1.d1d5ba9e5676cp-42, -0x1.71c57d72b90eap-38, -0x1.018922e3bb1eap-40,
+ -0x1.e0970faab38e6p-39, 0x1.a442b8ab5ed33p-39, -0x1.3a6f0acbd7293p-40, -0x1.7c53be7062a3ap-39, -0x1.c562622693573p-44, 0x1.458e668db57cdp-41,
+ -0x1.d5f41a61e90a0p-41, -0x1.60d1f7c57cb11p-39, -0x1.f8fa4c98324fep-39, -0x1.7b178840b90e3p-39, 0x1.a8558cdf5220ap-40, 0x1.3f7acb241cdbbp-40,
+ -0x1.086dc81118428p-39, -0x1.15828db8b2da6p-40, 0x1.18f9d5a5099c3p-39, 0x1.21cd05249b8c9p-40, -0x1.82493a2d7a1fep-40, -0x1.0510a8a58c1abp-40, 0},
+
+{0x1.4c0cf8eccd2e0p-35, 0x1.de696ed8004cbp-36, 0x1.62392d5363e58p-37, -0x1.21d68e1a8e4c7p-37, -0x1.867b57075ec9dp-36, -0x1.058af4c30abafp-35,
+ -0x1.dbb6594ed5127p-36, -0x1.6006d1f354794p-36, -0x1.311e96adfec96p-37, 0x1.2c82e5ef56703p-39, 0x1.6f2c1413cbe8ep-37, 0x1.c46886dd6c5d6p-37,
+ 0x1.92e273bf63d54p-37, 0x1.2982faf5df034p-37, 0x1.5ad37b1dc30c4p-38, 0x1.97104fd2630f8p-40, -0x1.38bcd955ecbb9p-40, -0x1.7779727d36c91p-39,
+ -0x1.4862c13c3ccf5p-39, -0x1.53facd6319433p-39, -0x1.de2f6e88b0926p-41, -0x1.fb0967f0fa611p-41, 0x1.5fadb405af344p-42, 0x1.e90319ef64411p-43,
+ 0x1.fc013fac4d3d7p-41, 0x1.0546d08a05cacp-41, 0x1.fa1b10c35012ep-41, -0x1.000d4354b8049p-41, 0x1.b68ee44b2b84bp-41, 0x1.cfa36d83ea2afp-48,
+ 0x1.5c41a6c8aaf3ap-41, -0x1.7edb2342ceb28p-41, 0x1.d9211942a37d9p-43, 0x1.39b815d399ba2p-41, 0x1.1fc46969db91bp-46, -0x1.1736507c25bafp-43,
+ 0x1.89bbcfdb5c677p-43, 0x1.28f22b295bc86p-41, 0x1.a9396e0b45a3bp-41, 0x1.3f409ac2dbfafp-41, -0x1.65682520f07a7p-42, -0x1.0d1586492d3b1p-42,
+ 0x1.bd6c9f236abc3p-42, 0x1.d376a4bd795bep-43, -0x1.d94e87dd31275p-42, -0x1.e82d04ff5649fp-43, 0x1.455b18d5d810fp-42, 0x1.b7c6a4ab711bdp-43, 0}
+ // clang-format on
+ }};
diff --git a/pl/math/v_erfc_4u.c b/pl/math/v_erfc_4u.c
new file mode 100644
index 0000000..c306351
--- /dev/null
+++ b/pl/math/v_erfc_4u.c
@@ -0,0 +1,168 @@
+/*
+ * Double-precision vector erfc(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "horner.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+/* Accurate exponential (vector variant of exp_dd). */
+v_f64_t V_NAME (exp_tail) (v_f64_t, v_f64_t);
+
+#define One v_f64 (1.0)
+#define AbsMask v_u64 (0x7fffffffffffffff)
+#define Scale v_f64 (0x1.0000002p27)
+
+/* Coeffs for polynomial approximation on [0x1.0p-28., 31.]. */
+#define PX __v_erfc_data.poly
+#define xint __v_erfc_data.interval_bounds
+
+/* Special cases (fall back to scalar calls). */
+VPCS_ATTR
+NOINLINE static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
+{
+ return v_call_f64 (erfc, x, y, cmp);
+}
+
+/* A structure to perform look-up in coeffs and other parameter
+ tables. */
+struct entry
+{
+ v_f64_t P[ERFC_POLY_ORDER + 1];
+ v_f64_t xi;
+};
+
+static inline struct entry
+lookup (v_u64_t i)
+{
+ struct entry e;
+#ifdef SCALAR
+ for (int j = 0; j <= ERFC_POLY_ORDER; ++j)
+ e.P[j] = PX[i][j];
+ e.xi = xint[i];
+#else
+ for (int j = 0; j <= ERFC_POLY_ORDER; ++j)
+ {
+ e.P[j][0] = PX[i[0]][j];
+ e.P[j][1] = PX[i[1]][j];
+ }
+ e.xi[0] = xint[i[0]];
+ e.xi[1] = xint[i[1]];
+#endif
+ return e;
+}
+
+/* Accurate evaluation of exp(x^2) using compensated product
+ (x^2 ~ x*x + e2) and custom exp(y+d) routine for small
+ corrections d<<y. */
+static inline v_f64_t
+v_eval_gauss (v_f64_t a)
+{
+ v_f64_t e2;
+ v_f64_t a2 = a * a;
+
+ /* TwoProduct (Dekker) applied to a * a. */
+ v_f64_t a_hi = -v_fma_f64 (Scale, a, -a);
+ a_hi = v_fma_f64 (Scale, a, a_hi);
+ v_f64_t a_lo = a - a_hi;
+
+ /* Now assemble error term. */
+ e2 = v_fma_f64 (-a_hi, a_hi, a2);
+ e2 = v_fma_f64 (-a_hi, a_lo, e2);
+ e2 = v_fma_f64 (-a_lo, a_hi, e2);
+ e2 = v_fma_f64 (-a_lo, a_lo, e2);
+
+ /* Fast and accurate evaluation of exp(-a2 + e2) where e2 << a2. */
+ return V_NAME (exp_tail) (-a2, e2);
+}
+
+/* Optimized double precision vector complementary error function erfc.
+ Maximum measured error is 3.64 ULP:
+ __v_erfc(0x1.4792573ee6cc7p+2) got 0x1.ff3f4c8e200d5p-42
+ want 0x1.ff3f4c8e200d9p-42. */
+VPCS_ATTR
+v_f64_t V_NAME (erfc) (v_f64_t x)
+{
+ v_f64_t z, p, y;
+ v_u64_t ix, atop, sign, i, cmp;
+
+ ix = v_as_u64_f64 (x);
+ /* Compute fac as early as possible in order to get best performance. */
+ v_f64_t fac = v_as_f64_u64 ((ix >> 63) << 62);
+ /* Use 12-bit for small, nan and inf case detection. */
+ atop = (ix >> 52) & 0x7ff;
+ cmp = v_cond_u64 (atop - v_u64 (0x3cd) >= v_u64 (0x7ff - 0x3cd));
+
+ struct entry dat;
+
+ /* All entries of the vector are out of bounds, take a short path.
+ Use smallest possible number above 28 representable in 12 bits. */
+ v_u64_t out_of_bounds = v_cond_u64 (atop >= v_u64 (0x404));
+
+ /* Use sign to produce either 0 if x > 0, 2 otherwise. */
+ if (v_all_u64 (out_of_bounds) && likely (v_any_u64 (~cmp)))
+ return fac;
+
+ /* erfc(|x|) = P(|x|-x_i)*exp(-x^2). */
+
+ v_f64_t a = v_abs_f64 (x);
+
+ /* Interval bounds are a logarithmic scale, i.e. interval n has
+ lower bound 2^(n/4) - 1. Use the exponent of (|x|+1)^4 to obtain
+ the interval index. */
+ v_f64_t xp1 = a + v_f64 (1.0);
+ xp1 = xp1 * xp1;
+ xp1 = xp1 * xp1;
+ v_u64_t ixp1 = v_as_u64_f64 (xp1);
+ i = (ixp1 >> 52) - v_u64 (1023);
+
+ /* Index cannot exceed number of polynomials. */
+#ifdef SCALAR
+ i = i <= (ERFC_NUM_INTERVALS) ? i : ERFC_NUM_INTERVALS;
+#else
+ i = (v_u64_t){i[0] <= ERFC_NUM_INTERVALS ? i[0] : ERFC_NUM_INTERVALS,
+ i[1] <= ERFC_NUM_INTERVALS ? i[1] : ERFC_NUM_INTERVALS};
+#endif
+ /* Get coeffs of i-th polynomial. */
+ dat = lookup (i);
+
+ /* Evaluate Polynomial: P(|x|-x_i). */
+ z = a - dat.xi;
+#define C(i) dat.P[i]
+ p = HORNER_12 (z, C);
+
+ /* Evaluate Gaussian: exp(-x^2). */
+ v_f64_t e = v_eval_gauss (a);
+
+ /* Copy sign. */
+ sign = v_as_u64_f64 (x) & ~AbsMask;
+ p = v_as_f64_u64 (v_as_u64_f64 (p) ^ sign);
+
+ /* Assemble result as 2.0 - p * e if x < 0, p * e otherwise. */
+ y = v_fma_f64 (p, e, fac);
+
+ /* No need to fix value of y if x is out of bound, as
+ P[ERFC_NUM_INTERVALS]=0. */
+ if (unlikely (v_any_u64 (cmp)))
+ return specialcase (x, y, cmp);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, erfc, -6.0, 28.0)
+PL_TEST_ULP (V_NAME (erfc), 3.15)
+PL_TEST_INTERVAL (V_NAME (erfc), 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (V_NAME (erfc), 0x1p-1022, 0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME (erfc), -0x1p-1022, -0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME (erfc), 0x1p-26, 0x1p5, 40000)
+PL_TEST_INTERVAL (V_NAME (erfc), -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (V_NAME (erfc), 0, inf, 40000)
+#endif
diff --git a/pl/math/v_erfc_data.c b/pl/math/v_erfc_data.c
new file mode 100644
index 0000000..3c47033
--- /dev/null
+++ b/pl/math/v_erfc_data.c
@@ -0,0 +1,96 @@
+/*
+ * Polynomial coefficients for double-precision erfc(x) vector function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Coefficients for 20 order-12 polynomials used in v_erfc. The intervals have
+ the same bounds as the scalar algorithm, with the exception of the lower
+ bound of the first interval which is larger. This is because the vector
+ variants fall back to the scalar for tiny arguments, meaning that we can use
+ a slightly different approach which is more precise for larger inputs but
+ unacceptably imprecise for tiny inputs. */
+
+const struct v_erfc_data __v_erfc_data = {
+
+/* Bounds for 20 intervals spanning [0x1.0p-28., 31.]. Interval bounds are a
+ logarithmic scale, i.e. interval n has lower bound 2^(n/4) - 1, with the
+ exception of the first interval. */
+.interval_bounds = {
+ 0x1p-28, /* If xmin=2^-28, 0 otherwise. */
+ 0x1.837f0518db8a9p-3, /* 0.189. */
+ 0x1.a827999fcef32p-2, /* 0.414. */
+ 0x1.5d13f32b5a75bp-1, /* 0.682. */
+ 0x1.0p0, /* 1.000. */
+ 0x1.60dfc14636e2ap0, /* 1.378. */
+ 0x1.d413cccfe779ap0, /* 1.828. */
+ 0x1.2e89f995ad3adp1, /* 2.364. */
+ 0x1.8p1, /* 3.000. */
+ 0x1.e0dfc14636e2ap1, /* 3.757. */
+ 0x1.2a09e667f3bcdp2, /* 4.657. */
+ 0x1.6e89f995ad3adp2, /* 5.727. */
+ 0x1.cp2, /* 7.000. */
+ 0x1.106fe0a31b715p3, /* 8.514. */
+ 0x1.4a09e667f3bcdp3, /* 10.31. */
+ 0x1.8e89f995ad3adp3, /* 12.45. */
+ 0x1.ep3, /* 15.00. */
+ 0x1.206fe0a31b715p4, /* 18.03. */
+ 0x1.5a09e667f3bcdp4, /* 21.63. */
+ 0x1.9e89f995ad3adp4, /* 25.91. */
+ 0x1.fp4 /* 31.00. */
+},
+
+/* Generated using fpminimax algorithm on each interval separately. The
+ polynomial approximates erfc(x + a) * exp((x + a) ^ 2) in the interval
+ [0;b-a], where [a;b] is the interval in which the input lies. Note this is
+ slightly different from the scalar polynomial, which approximates
+ erfc(x + a) * exp(x ^ 2). See v_erfc.sollya for more details. */
+.poly = {
+/* 3.725290298461914e-9 < x < 0.18920711500272103. */
+{0x1.ffffffdbe4516p-1, -0x1.20dd74e429b54p0, 0x1.ffffffb7c6a67p-1, -0x1.8127466fa2ec9p-1, 0x1.ffffff6eeff5ap-2, -0x1.341f668c90dccp-2, 0x1.5554aca74e5d6p-3, -0x1.6014d9d3fed0dp-4, 0x1.546b5f2c85127p-5, -0x1.2f7ec79acc129p-6, 0x1.a27e53703b7abp-8, 0x1.7b18bce311fa3p-12, -0x1.1897cda04df3ap-9},
+/* 0.18920711500272103 < x < 0.41421356237309515. */
+{0x1.a2b43de077724p-1, -0x1.a3495bb58664cp-1, 0x1.535f3ff4547e6p-1, -0x1.d96eea2951a7cp-2, 0x1.269566a956371p-2, -0x1.4e281de026b47p-3, 0x1.5ea071b652a2fp-4, -0x1.57f46cfca7024p-5, 0x1.3db28243f06abp-6, -0x1.138745eef6f26p-7, 0x1.a9cd70bad344p-9, -0x1.c6e4fda8920c4p-11, 0x1.624709ca2bc71p-16},
+/* 0.41421356237309515 < x < 0.681792830507429. */
+{0x1.532e75764e513p-1, -0x1.28be34f327f9dp-1, 0x1.b088738cca84cp-2, -0x1.14377551bd5c8p-2, 0x1.3e1ecedd64246p-3, -0x1.5087f3110eb57p-4, 0x1.4b3c61efcb562p-5, -0x1.324cc70a4f459p-6, 0x1.0cd19a96af21bp-7, -0x1.cc2ccc725d07p-9, 0x1.a3ba67a7d02b4p-10, -0x1.b1943295882abp-11, 0x1.53a1c5fdf8e67p-12},
+/* 0.681792830507429 < x < 1. */
+{0x1.10f974588f63dp-1, -0x1.9b032139e3367p-2, 0x1.09b942b8a951dp-2, -0x1.327553909cb88p-3, 0x1.42819b6c9a14p-4, -0x1.3a6d6f1924825p-5, 0x1.1f1864dd6f28fp-6, -0x1.ef12c5e9f3232p-8, 0x1.962ac63d55aa1p-9, -0x1.4146d9206419cp-10, 0x1.f823f62268229p-12, -0x1.837ab488d5ed8p-13, 0x1.aa021ae16edfep-15},
+/* 1 < x < 1.378414230005442. */
+{0x1.b5d8780f956b2p-2, -0x1.17c4e3f17c034p-2, 0x1.3c27283c31939p-3, -0x1.44837f88a0ecdp-4, 0x1.33cad0dc779c8p-5, -0x1.10fcef8294e8dp-6, 0x1.c8cb3e5a6a5a6p-8, -0x1.6aedbd3a05f1cp-9, 0x1.1325c0bf9a0cap-10, -0x1.8e28d61a0f646p-12, 0x1.0d554e2ab3652p-13, -0x1.35b5f9ac296ebp-15, 0x1.b8faf07e2527dp-18},
+/* 1.378414230005442 < x < 1.8284271247461903. */
+{0x1.5ee444130b7dbp-2, -0x1.78396ab2083e8p-3, 0x1.6e617ec5bc039p-4, -0x1.49e60f6238765p-5, 0x1.16064fb4428c9p-6, -0x1.ba80a8575a434p-8, 0x1.4ec30f2efeb8p-9, -0x1.e40456c735f09p-11, 0x1.4f7ee6b7885b7p-12, -0x1.bc9997995fdecp-14, 0x1.1169f7327ff2p-15, -0x1.174826d000852p-17, 0x1.5506a7433e925p-20},
+/* 1.8284271247461903 < x < 2.363585661014858. */
+{0x1.19a22c064d4eap-2, -0x1.f645498cae1b3p-4, 0x1.a0565950e1256p-5, -0x1.446605c186f6dp-6, 0x1.df1231b47ff04p-8, -0x1.515164d13dfafp-9, 0x1.c72bde869ad61p-11, -0x1.2768fbf9b1d6ep-12, 0x1.71bd3a1b851e9p-14, -0x1.bca5b5942017cp-16, 0x1.f2d480b3a2e63p-18, -0x1.d339662d53467p-20, 0x1.06d67ebf792bp-22},
+/* 2.363585661014858 < x < 3. */
+{0x1.c57f0542a7637p-3, -0x1.4e5535c17af25p-4, 0x1.d31272523acfep-6, -0x1.3727cbbfd1bfcp-7, 0x1.8d6730b8c5a4cp-9, -0x1.e88548286036fp-11, 0x1.21f6e89456853p-12, -0x1.4d4b7787bd3c2p-14, 0x1.735dc84e7ff16p-16, -0x1.8eb02db832048p-18, 0x1.8dfb8add3b86ep-20, -0x1.47a340d76c72bp-22, 0x1.3e5925ffebe6bp-25},
+/* 3 < x < 3.756828460010884. */
+{0x1.6e9827d229d2dp-3, -0x1.bd6ae4d14b1adp-5, 0x1.043fe1a98c3b9p-6, -0x1.259061ba34453p-8, 0x1.409cc2cc96bedp-10, -0x1.53dec3fd6c443p-12, 0x1.5e72f7baf3554p-14, -0x1.601aa94bf21eep-16, 0x1.58e730ceaa91dp-18, -0x1.4762cbd256163p-20, 0x1.22b8bea5d4a5ap-22, -0x1.ac197af37fcadp-25, 0x1.74cdf138a0b73p-28},
+/* 3.756828460010884 < x < 4.656854249492381. */
+{0x1.29a8a4e95063ep-3, -0x1.29a8a316d331dp-5, 0x1.21876b3fe50cfp-7, -0x1.1276f2d8eefd9p-9, 0x1.fbff521741e5cp-12, -0x1.cb9ce996b9601p-14, 0x1.971075371ef81p-16, -0x1.61458571e4738p-18, 0x1.2c51c21b7ab9ep-20, -0x1.f01e444a666c3p-23, 0x1.7e8f2979b67f1p-25, -0x1.e505367843027p-28, 0x1.67809d68de49cp-31},
+/* 4.656854249492381 < x < 5.727171322029716. */
+{0x1.e583024e2bc7fp-4, -0x1.8fb458acb5acep-6, 0x1.42b9dffac075cp-8, -0x1.ff9fe9a48522p-11, 0x1.8e7e866f4f073p-13, -0x1.313aeee1c2d45p-15, 0x1.cc299efd7374cp-18, -0x1.5587e53442d66p-20, 0x1.f2aca160f159bp-23, -0x1.62ae4834dcda7p-25, 0x1.d6b070147cb37p-28, -0x1.fee399e7be1bfp-31, 0x1.41d6f9fbc9515p-34},
+/* 5.727171322029716 < x < 7. */
+{0x1.8d9cbafa30408p-4, -0x1.0dd14614ed1cfp-6, 0x1.6943976ea6bf4p-9, -0x1.dd6f05f3b914cp-12, 0x1.37891317e7bcfp-14, -0x1.91a81ce9014a2p-17, 0x1.ffcac303208b9p-20, -0x1.424f1af78feb3p-22, 0x1.90b8edbca12a5p-25, -0x1.e69bea0338c7fp-28, 0x1.13b974a710373p-30, -0x1.fdc9aa9359794p-34, 0x1.105fc772b5a66p-37},
+/* 7 < x < 8.513656920021768. */
+{0x1.46dc6bf900f68p-4, -0x1.6e4b45246f95p-7, 0x1.96a3de47d4bd7p-10, -0x1.bf5070eccb409p-13, 0x1.e7af6e83607a2p-16, -0x1.078bf5306f9eep-18, 0x1.1a6e8327243adp-21, -0x1.2c1e7368c7809p-24, 0x1.3bc83557dac43p-27, -0x1.45a6405b2e649p-30, 0x1.3aac4888689ebp-33, -0x1.f1fa23448a168p-37, 0x1.c868668755778p-41},
+/* 8.513656920021768 < x < 10.313708498984761. */
+{0x1.0d9a17e032288p-4, -0x1.f3e942ff4df7p-8, 0x1.cc77f09dabc5cp-11, -0x1.a56e8bfd32da8p-14, 0x1.7f49e31164409p-17, -0x1.5a73f46a6afc9p-20, 0x1.374240ce973d2p-23, -0x1.15e8d473b728cp-26, 0x1.ec3ec79699378p-30, -0x1.ab3b8aba63362p-33, 0x1.5a1381cfe2866p-36, -0x1.c78e252ce77ccp-40, 0x1.589857ceaaaeep-44},
+/* 10.313708498984761 < x < 12.454342644059432. */
+{0x1.be0c73cc19eddp-5, -0x1.56ce6f6c0cbb1p-8, 0x1.0645980ecbbfcp-11, -0x1.8f86f887f6598p-15, 0x1.2ef80cd9e00b1p-18, -0x1.c97ffd66720e4p-22, 0x1.57f0eeecf030ap-25, -0x1.016df7d5e28d9p-28, 0x1.7f0d022922f1dp-32, -0x1.1849731f004aep-35, 0x1.8149e7ca0fb3cp-39, -0x1.b1fe4abe62d81p-43, 0x1.1ae4d60247651p-47},
+/* 12.454342644059432 < x < 15. */
+{0x1.71eafbd9f5877p-5, -0x1.d83714d90461fp-9, 0x1.2c74dbacd45fdp-12, -0x1.7d27f3cfe160ep-16, 0x1.e20b13b8d32e3p-20, -0x1.2fe33cb2bce33p-23, 0x1.7dfd564d69a07p-27, -0x1.dea62ef0f7d7ep-31, 0x1.2a7b946273ea5p-34, -0x1.6eb665bad5b72p-38, 0x1.a8191750e8bf9p-42, -0x1.92d8a86cbd0fcp-46, 0x1.bba272feef841p-51},
+/* 15 < x < 18.027313840043536. */
+{0x1.33714a024097ep-5, -0x1.467f441a50bc3p-9, 0x1.59fa2994c6f7ap-13, -0x1.6dd369d642b7dp-17, 0x1.81fb2aaf2e37p-21, -0x1.966040990b623p-25, 0x1.aaee55e15a079p-29, -0x1.bf756fc8ef04p-33, 0x1.d2daf554e0157p-37, -0x1.dec63e10d317p-41, 0x1.cae915bab7704p-45, -0x1.6537fbb62a8edp-49, 0x1.3f14bd5531da8p-54},
+/* 18.027313840043536 < x < 21.627416997969522. */
+{0x1.fff97acd75487p-6, -0x1.c502e8e46eb81p-10, 0x1.903b065062756p-14, -0x1.6110aa5e81885p-18, 0x1.36fd4c13c4f1fp-22, -0x1.11848650be987p-26, 0x1.e06596bf6a27p-31, -0x1.a527876771d55p-35, 0x1.6fe1b92a40eb8p-39, -0x1.3c6eb50b23bc6p-43, 0x1.fead2230125dp-48, -0x1.5073427c5207dp-52, 0x1.ff420973fa51dp-58},
+/* 21.627416997969522 < x < 25.908685288118864. */
+{0x1.aaf347fc8c45bp-6, -0x1.3b2fd709cf8e5p-10, 0x1.d0ddfb858b60ap-15, -0x1.5673f4a8bb08ep-19, 0x1.f80488e89ddb9p-24, -0x1.728391905fcf3p-28, 0x1.101538d7e30bap-32, -0x1.8f16f49d0fa3bp-37, 0x1.23bbaea534034p-41, -0x1.a40119533ee1p-46, 0x1.1b75770e435fdp-50, -0x1.3804bdeb33efdp-55, 0x1.8ba4e7838a4dp-61},
+/* 25.908685288118864 < x < 31. */
+{0x1.64839d636f92bp-6, -0x1.b7adf753623afp-11, 0x1.0eec0b635a0c4p-15, -0x1.4da09b802ef48p-20, 0x1.9a8b149f5ddf1p-25, -0x1.f8d1f722c65bap-30, 0x1.36247d9a20e19p-34, -0x1.7cbd25180c1d3p-39, 0x1.d243c7a5c8331p-44, -0x1.19e00cc6b1e08p-48, 0x1.418cb6823f2d9p-53, -0x1.2dfdc526c43acp-58, 0x1.49885a987486fp-64},
+/* Dummy interval for x>31 */
+{0x0p0, 0x0p0, 0x0p0, 0x0p0, 0x0p0, 0x0p0, 0x0p0, 0x0p0, 0x0p0, 0x0p0,
+ 0x0p0, 0x0p0, 0x0p0}
+}
+};
diff --git a/pl/math/v_erfcf_1u.c b/pl/math/v_erfcf_1u.c
new file mode 100644
index 0000000..963490d
--- /dev/null
+++ b/pl/math/v_erfcf_1u.c
@@ -0,0 +1,183 @@
+/*
+ * Single-precision vector erfc(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "erfcf.h"
+#include "estrin.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define P(ia12) __erfcf_poly_data.poly[interval_index (ia12)]
+
+VPCS_ATTR v_f64_t V_NAME (exp_tail) (v_f64_t, v_f64_t);
+
+static VPCS_ATTR NOINLINE v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t special)
+{
+ return v_call_f32 (erfcf, x, y, special);
+}
+
+static inline uint32_t
+interval_index (uint32_t ia12)
+{
+ // clang-format off
+ return (ia12 < 0x400 ? 0 :
+ (ia12 < 0x408 ? 1 :
+ (ia12 < 0x410 ? 2 :
+ 3)));
+ // clang-format on
+}
+
+/* The C macro wraps the coeffs argument in order to make the
+ poynomial evaluation more readable. In the scalarised variant the
+ second pointer is ignored. */
+#ifdef SCALAR
+#define C(i) coeff1[i]
+#else
+#define C(i) ((v_f64_t){coeff1[i], coeff2[i]})
+#endif
+
+static inline v_f64_t
+v_approx_erfcf_poly_gauss (v_f64_t x, const double *coeff1,
+ const double *coeff2)
+{
+ v_f64_t x2 = x * x;
+ v_f64_t x4 = x2 * x2;
+ v_f64_t poly = ESTRIN_15 (x, x2, x4, x4 * x4, C);
+ v_f64_t gauss = V_NAME (exp_tail) (-(x * x), v_f64 (0.0));
+ return poly * gauss;
+}
+
+static inline float
+approx_poly_gauss (float abs_x, const double *coeff)
+{
+ return (float) (eval_poly (abs_x, coeff) * eval_exp_mx2 (abs_x));
+}
+
+static v_f32_t
+v_approx_erfcf (v_f32_t abs_x, v_u32_t sign, v_u32_t ia12, v_u32_t lanes)
+{
+#ifdef SCALAR
+ float y = approx_poly_gauss (abs_x, P (ia12));
+ return sign ? 2 - y : y;
+#else
+ float32x2_t lo32 = {0, 0};
+ float32x2_t hi32 = {0, 0};
+ /* The polynomial and Gaussian components must be calculated in
+ double precision in order to meet the required ULP error. This
+ means we have to promote low and high halves of the
+ single-precision input vector to two separate double-precision
+ input vectors. This incurs some overhead, and there is also
+ overhead to loading the polynomial coefficients as this cannot be
+ done in a vector fashion. This would be wasted effort for
+ elements which lie in the 'boring' zone, as they will be
+ overwritten later. Hence we use the lanes parameter to only do
+ the promotion on a pair of lanes if both of those lanes are
+ interesting and not special cases. If one lane is inactive, we
+ use a scalar routine which is shared with the scalar variant. */
+ if (lanes[0] & lanes[1])
+ {
+ lo32 = vcvt_f32_f64 (
+ v_approx_erfcf_poly_gauss (vcvt_f64_f32 (vget_low_f32 (abs_x)),
+ P (ia12[0]), P (ia12[1])));
+ }
+ else if (lanes[0])
+ {
+ lo32[0] = approx_poly_gauss (abs_x[0], P (ia12[0]));
+ }
+ else if (lanes[1])
+ {
+ lo32[1] = approx_poly_gauss (abs_x[1], P (ia12[1]));
+ }
+
+ if (lanes[2] & lanes[3])
+ {
+ hi32
+ = vcvt_f32_f64 (v_approx_erfcf_poly_gauss (vcvt_high_f64_f32 (abs_x),
+ P (ia12[2]), P (ia12[3])));
+ }
+ else if (lanes[2])
+ {
+ hi32[0] = approx_poly_gauss (abs_x[2], P (ia12[2]));
+ }
+ else if (lanes[3])
+ {
+ hi32[1] = approx_poly_gauss (abs_x[3], P (ia12[3]));
+ }
+
+ v_f32_t y = vcombine_f32 (lo32, hi32);
+
+ if (v_any_u32 (sign))
+ {
+ y = vbslq_f32 (vceqzq_u32 (sign), y, 2 - y);
+ }
+
+ return y;
+#endif
+}
+
+/* Optimized single-precision vector complementary error function
+ erfcf. Max measured error: 0.750092 at various values between
+ -0x1.06521p-20 and -0x1.add1dap-17. For example:
+ __v_erfc(-0x1.08185p-18) got 0x1.00004cp+0 want 0x1.00004ap+0
+ +0.249908 ulp err 0.250092. */
+VPCS_ATTR
+v_f32_t V_NAME (erfcf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t ia = ix & 0x7fffffff;
+ v_u32_t ia12 = ia >> 20;
+ v_u32_t sign = ix >> 31;
+ v_u32_t inf_ia12 = v_u32 (0x7f8);
+
+ v_u32_t special_cases
+ = v_cond_u32 ((ia12 - 0x328) >= ((inf_ia12 & 0x7f8) - 0x328));
+ v_u32_t in_bounds
+ = v_cond_u32 ((ia < 0x408ccccd) | (~sign & (ix < 0x4120f5c3)));
+ v_f32_t boring_zone = v_as_f32_u32 (sign << 30);
+
+#ifdef SCALAR
+ if (unlikely (special_cases))
+ {
+ if (ia12 >= 0x7f8)
+ return (float) (sign << 1) + 1.0f / x; /* Special cases. */
+ else
+ return 1.0f - x; /* Small case. */
+ }
+ else if (likely (!in_bounds))
+ {
+ return sign ? boring_zone : __math_uflowf (boring_zone);
+ }
+#endif
+
+ v_f32_t y = v_approx_erfcf (v_as_f32_u32 (ia), sign, ia12,
+ in_bounds & ~special_cases);
+
+#ifndef SCALAR
+ y = vbslq_f32 (~in_bounds, boring_zone, y);
+
+ if (unlikely (v_any_u32 (special_cases)))
+ {
+ return specialcase (x, y, special_cases);
+ }
+#endif
+
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, erfc, -6.0, 28.0)
+PL_TEST_ULP (V_NAME (erfcf), 0.26)
+PL_TEST_INTERVAL (V_NAME (erfcf), 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (V_NAME (erfcf), 0x1p-127, 0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME (erfcf), -0x1p-127, -0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME (erfcf), 0x1p-26, 0x1p5, 40000)
+PL_TEST_INTERVAL (V_NAME (erfcf), -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (V_NAME (erfcf), 0, inf, 40000)
+#endif
diff --git a/pl/math/v_erff_1u5.c b/pl/math/v_erff_1u5.c
new file mode 100644
index 0000000..3a25cc8
--- /dev/null
+++ b/pl/math/v_erff_1u5.c
@@ -0,0 +1,116 @@
+/*
+ * Single-precision vector erf(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "include/mathlib.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+VPCS_ATTR v_f32_t V_NAME (expf) (v_f32_t);
+
+#define AbsMask v_u32 (0x7fffffff)
+
+/* Special cases (fall back to scalar calls). */
+VPCS_ATTR
+NOINLINE static v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
+{
+ return v_call_f32 (erff, x, y, cmp);
+}
+
+/* A structure to perform look-up in coeffs and other parameter tables. */
+struct entry
+{
+ v_f32_t P[V_ERFF_NCOEFFS];
+};
+
+static inline struct entry
+lookup (v_u32_t i)
+{
+ struct entry e;
+#ifdef SCALAR
+ for (int j = 0; j < V_ERFF_NCOEFFS; ++j)
+ e.P[j] = __v_erff_data.coeffs[j][i];
+#else
+ for (int j = 0; j < V_ERFF_NCOEFFS; ++j)
+ {
+ e.P[j][0] = __v_erff_data.coeffs[j][i[0]];
+ e.P[j][1] = __v_erff_data.coeffs[j][i[1]];
+ e.P[j][2] = __v_erff_data.coeffs[j][i[2]];
+ e.P[j][3] = __v_erff_data.coeffs[j][i[3]];
+ }
+#endif
+ return e;
+}
+
+/* Optimized single precision vector error function erf.
+ Maximum measured at +/- 0.931, 1.25ULP:
+ v_erff(-0x1.dc59fap-1) got -0x1.9f9c88p-1
+ want -0x1.9f9c8ap-1. */
+VPCS_ATTR
+v_f32_t V_NAME (erff) (v_f32_t x)
+{
+ /* Handle both inf/nan as well as small values (|x|<2^-28). If any condition
+ in the lane is true then a loop over scalar calls will be performed. */
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t atop = (ix >> 16) & v_u32 (0x7fff);
+ v_u32_t cmp = v_cond_u32 (atop - v_u32 (0x3180) >= v_u32 (0x7ff0 - 0x3180));
+
+ /* Get sign and absolute value. */
+ v_u32_t sign = ix & ~AbsMask;
+ /* |x| < 0.921875. */
+ v_u32_t red = v_calt_f32 (x, v_f32 (0.921875f));
+ /* |x| > 4.0. */
+ v_u32_t bor = v_cagt_f32 (x, v_f32 (4.0f));
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ v_u32_t i = v_sel_u32 (red, v_u32 (0), v_u32 (1));
+
+ /* Get polynomial coefficients. */
+ struct entry dat = lookup (i);
+
+ v_f32_t a = v_abs_f32 (x);
+ v_f32_t z = v_sel_f32 (red, x * x, a);
+
+ /* Evaluate Polynomial of |x| or x^2. */
+ v_f32_t r = dat.P[6];
+ r = v_fma_f32 (z, r, dat.P[5]);
+ r = v_fma_f32 (z, r, dat.P[4]);
+ r = v_fma_f32 (z, r, dat.P[3]);
+ r = v_fma_f32 (z, r, dat.P[2]);
+ r = v_fma_f32 (z, r, dat.P[1]);
+ r = v_sel_f32 (red, r, v_fma_f32 (z, r, dat.P[0]));
+ r = v_fma_f32 (a, r, a);
+
+ /* y = |x| + |x|*P(|x|) if |x| < 0.921875
+ 1 - exp (-(|x|+|x|*P(x^2))) otherwise. */
+ v_f32_t y = v_sel_f32 (red, r, v_f32 (1.0f) - V_NAME (expf) (-r));
+
+ /* Boring domain (absolute value is required to get the sign of erf(-nan)
+ right). */
+ y = v_sel_f32 (bor, v_f32 (1.0f), v_abs_f32 (y));
+
+ /* y=erf(x) if x>0, -erf(-x) otherwise. */
+ y = v_as_f32_u32 (v_as_u32_f32 (y) ^ sign);
+
+ if (unlikely (v_any_u32 (cmp)))
+ return specialcase (x, y, cmp);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, erf, -4.0, 4.0)
+PL_TEST_ULP (V_NAME (erff), 0.76)
+PL_TEST_INTERVAL (V_NAME (erff), 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (V_NAME (erff), 0x1p-127, 0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME (erff), -0x1p-127, -0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME (erff), 0x1p-26, 0x1p3, 40000)
+PL_TEST_INTERVAL (V_NAME (erff), -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (V_NAME (erff), 0, inf, 40000)
+#endif
diff --git a/pl/math/v_erff_data.c b/pl/math/v_erff_data.c
new file mode 100644
index 0000000..73ccb5c
--- /dev/null
+++ b/pl/math/v_erff_data.c
@@ -0,0 +1,18 @@
+/*
+ * Data for approximation of vector erff.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Minimax approximation of erff. */
+const struct v_erff_data __v_erff_data
+ = {.coeffs = {{0x0p0f, 0x1.079d0cp-3f},
+ {0x1.06eba6p-03f, 0x1.450aa0p-1},
+ {-0x1.8126e0p-02f, 0x1.b55cb0p-4f},
+ {0x1.ce1a46p-04f, -0x1.8d6300p-6f},
+ {-0x1.b68bd2p-06f, 0x1.fd1336p-9f},
+ {0x1.473f48p-08f, -0x1.91d2ccp-12f},
+ {-0x1.3a1a82p-11f, 0x1.222900p-16f}}};
diff --git a/pl/math/v_exp_tail.c b/pl/math/v_exp_tail.c
new file mode 100644
index 0000000..fd38aa8
--- /dev/null
+++ b/pl/math/v_exp_tail.c
@@ -0,0 +1,75 @@
+/*
+ * Double-precision vector e^(x+tail) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "math_config.h"
+#if V_SUPPORTED
+#include "v_exp_tail.h"
+
+#define C1 v_f64 (C1_scal)
+#define C2 v_f64 (C2_scal)
+#define C3 v_f64 (C3_scal)
+#define InvLn2 v_f64 (InvLn2_scal)
+#define Ln2hi v_f64 (Ln2hi_scal)
+#define Ln2lo v_f64 (Ln2lo_scal)
+
+#define IndexMask v_u64 (IndexMask_scal)
+#define Shift v_f64 (Shift_scal)
+#define Thres v_f64 (Thres_scal)
+
+VPCS_ATTR
+static v_f64_t
+specialcase (v_f64_t s, v_f64_t y, v_f64_t n)
+{
+ v_f64_t absn = v_abs_f64 (n);
+
+ /* 2^(n/N) may overflow, break it up into s1*s2. */
+ v_u64_t b = v_cond_u64 (n <= v_f64 (0.0)) & v_u64 (0x6000000000000000);
+ v_f64_t s1 = v_as_f64_u64 (v_u64 (0x7000000000000000) - b);
+ v_f64_t s2 = v_as_f64_u64 (v_as_u64_f64 (s) - v_u64 (0x3010000000000000) + b);
+ v_u64_t cmp = v_cond_u64 (absn > v_f64 (1280.0 * N));
+ v_f64_t r1 = s1 * s1;
+ v_f64_t r0 = v_fma_f64 (y, s2, s2) * s1;
+ return v_as_f64_u64 ((cmp & v_as_u64_f64 (r1)) | (~cmp & v_as_u64_f64 (r0)));
+}
+
+VPCS_ATTR
+v_f64_t V_NAME (exp_tail) (v_f64_t x, v_f64_t xtail)
+{
+ v_f64_t n, r, s, y, z;
+ v_u64_t cmp, u, e, i;
+
+ cmp = v_cond_u64 (v_abs_f64 (x) > Thres);
+
+ /* n = round(x/(ln2/N)). */
+ z = v_fma_f64 (x, InvLn2, Shift);
+ u = v_as_u64_f64 (z);
+ n = z - Shift;
+
+ /* r = x - n*ln2/N. */
+ r = x;
+ r = v_fma_f64 (-Ln2hi, n, r);
+ r = v_fma_f64 (-Ln2lo, n, r);
+
+ e = u << (52 - V_EXP_TAIL_TABLE_BITS);
+ i = u & IndexMask;
+
+ /* y = tail + exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4. */
+ y = v_fma_f64 (C3, r, C2);
+ y = v_fma_f64 (y, r, C1);
+ y = v_fma_f64 (y, r, v_f64 (1.0));
+ y = v_fma_f64 (y, r, xtail);
+
+ /* s = 2^(n/N). */
+ u = v_lookup_u64 (Tab, i);
+ s = v_as_f64_u64 (u + e);
+
+ if (unlikely (v_any_u64 (cmp)))
+ return specialcase (s, y, n);
+ return v_fma_f64 (y, s, s);
+}
+#endif
diff --git a/pl/math/v_exp_tail.h b/pl/math/v_exp_tail.h
new file mode 100644
index 0000000..903f1fd
--- /dev/null
+++ b/pl/math/v_exp_tail.h
@@ -0,0 +1,21 @@
+/*
+ * Constants for double-precision e^(x+tail) vector function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define C1_scal 0x1.fffffffffffd4p-2
+#define C2_scal 0x1.5555571d6b68cp-3
+#define C3_scal 0x1.5555576a59599p-5
+#define InvLn2_scal 0x1.71547652b82fep8 /* N/ln2. */
+#define Ln2hi_scal 0x1.62e42fefa39efp-9 /* ln2/N. */
+#define Ln2lo_scal 0x1.abc9e3b39803f3p-64
+
+#define N (1 << V_EXP_TAIL_TABLE_BITS)
+#define Tab __v_exp_tail_data
+#define IndexMask_scal (N - 1)
+#define Shift_scal 0x1.8p+52
+#define Thres_scal 704.0
diff --git a/pl/math/v_exp_tail_data.c b/pl/math/v_exp_tail_data.c
new file mode 100644
index 0000000..675eb76
--- /dev/null
+++ b/pl/math/v_exp_tail_data.c
@@ -0,0 +1,97 @@
+/*
+ * Lookup table for double-precision e^(x+tail) vector function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* 2^(j/N), j=0..N (where N = 256). */
+const uint64_t __v_exp_tail_data[]
+ = {0x3ff0000000000000, 0x3feffb1afa5abcbf, 0x3feff63da9fb3335,
+ 0x3feff168143b0281, 0x3fefec9a3e778061, 0x3fefe7d42e11bbcc,
+ 0x3fefe315e86e7f85, 0x3fefde5f72f654b1, 0x3fefd9b0d3158574,
+ 0x3fefd50a0e3c1f89, 0x3fefd06b29ddf6de, 0x3fefcbd42b72a836,
+ 0x3fefc74518759bc8, 0x3fefc2bdf66607e0, 0x3fefbe3ecac6f383,
+ 0x3fefb9c79b1f3919, 0x3fefb5586cf9890f, 0x3fefb0f145e46c85,
+ 0x3fefac922b7247f7, 0x3fefa83b23395dec, 0x3fefa3ec32d3d1a2,
+ 0x3fef9fa55fdfa9c5, 0x3fef9b66affed31b, 0x3fef973028d7233e,
+ 0x3fef9301d0125b51, 0x3fef8edbab5e2ab6, 0x3fef8abdc06c31cc,
+ 0x3fef86a814f204ab, 0x3fef829aaea92de0, 0x3fef7e95934f312e,
+ 0x3fef7a98c8a58e51, 0x3fef76a45471c3c2, 0x3fef72b83c7d517b,
+ 0x3fef6ed48695bbc0, 0x3fef6af9388c8dea, 0x3fef672658375d2f,
+ 0x3fef635beb6fcb75, 0x3fef5f99f8138a1c, 0x3fef5be084045cd4,
+ 0x3fef582f95281c6b, 0x3fef54873168b9aa, 0x3fef50e75eb44027,
+ 0x3fef4d5022fcd91d, 0x3fef49c18438ce4d, 0x3fef463b88628cd6,
+ 0x3fef42be3578a819, 0x3fef3f49917ddc96, 0x3fef3bdda27912d1,
+ 0x3fef387a6e756238, 0x3fef351ffb82140a, 0x3fef31ce4fb2a63f,
+ 0x3fef2e85711ece75, 0x3fef2b4565e27cdd, 0x3fef280e341ddf29,
+ 0x3fef24dfe1f56381, 0x3fef21ba7591bb70, 0x3fef1e9df51fdee1,
+ 0x3fef1b8a66d10f13, 0x3fef187fd0dad990, 0x3fef157e39771b2f,
+ 0x3fef1285a6e4030b, 0x3fef0f961f641589, 0x3fef0cafa93e2f56,
+ 0x3fef09d24abd886b, 0x3fef06fe0a31b715, 0x3fef0432edeeb2fd,
+ 0x3fef0170fc4cd831, 0x3feefeb83ba8ea32, 0x3feefc08b26416ff,
+ 0x3feef96266e3fa2d, 0x3feef6c55f929ff1, 0x3feef431a2de883b,
+ 0x3feef1a7373aa9cb, 0x3feeef26231e754a, 0x3feeecae6d05d866,
+ 0x3feeea401b7140ef, 0x3feee7db34e59ff7, 0x3feee57fbfec6cf4,
+ 0x3feee32dc313a8e5, 0x3feee0e544ede173, 0x3feedea64c123422,
+ 0x3feedc70df1c5175, 0x3feeda4504ac801c, 0x3feed822c367a024,
+ 0x3feed60a21f72e2a, 0x3feed3fb2709468a, 0x3feed1f5d950a897,
+ 0x3feecffa3f84b9d4, 0x3feece086061892d, 0x3feecc2042a7d232,
+ 0x3feeca41ed1d0057, 0x3feec86d668b3237, 0x3feec6a2b5c13cd0,
+ 0x3feec4e1e192aed2, 0x3feec32af0d7d3de, 0x3feec17dea6db7d7,
+ 0x3feebfdad5362a27, 0x3feebe41b817c114, 0x3feebcb299fddd0d,
+ 0x3feebb2d81d8abff, 0x3feeb9b2769d2ca7, 0x3feeb8417f4531ee,
+ 0x3feeb6daa2cf6642, 0x3feeb57de83f4eef, 0x3feeb42b569d4f82,
+ 0x3feeb2e2f4f6ad27, 0x3feeb1a4ca5d920f, 0x3feeb070dde910d2,
+ 0x3feeaf4736b527da, 0x3feeae27dbe2c4cf, 0x3feead12d497c7fd,
+ 0x3feeac0827ff07cc, 0x3feeab07dd485429, 0x3feeaa11fba87a03,
+ 0x3feea9268a5946b7, 0x3feea84590998b93, 0x3feea76f15ad2148,
+ 0x3feea6a320dceb71, 0x3feea5e1b976dc09, 0x3feea52ae6cdf6f4,
+ 0x3feea47eb03a5585, 0x3feea3dd1d1929fd, 0x3feea34634ccc320,
+ 0x3feea2b9febc8fb7, 0x3feea23882552225, 0x3feea1c1c70833f6,
+ 0x3feea155d44ca973, 0x3feea0f4b19e9538, 0x3feea09e667f3bcd,
+ 0x3feea052fa75173e, 0x3feea012750bdabf, 0x3fee9fdcddd47645,
+ 0x3fee9fb23c651a2f, 0x3fee9f9298593ae5, 0x3fee9f7df9519484,
+ 0x3fee9f7466f42e87, 0x3fee9f75e8ec5f74, 0x3fee9f8286ead08a,
+ 0x3fee9f9a48a58174, 0x3fee9fbd35d7cbfd, 0x3fee9feb564267c9,
+ 0x3feea024b1ab6e09, 0x3feea0694fde5d3f, 0x3feea0b938ac1cf6,
+ 0x3feea11473eb0187, 0x3feea17b0976cfdb, 0x3feea1ed0130c132,
+ 0x3feea26a62ff86f0, 0x3feea2f336cf4e62, 0x3feea3878491c491,
+ 0x3feea427543e1a12, 0x3feea4d2add106d9, 0x3feea589994cce13,
+ 0x3feea64c1eb941f7, 0x3feea71a4623c7ad, 0x3feea7f4179f5b21,
+ 0x3feea8d99b4492ed, 0x3feea9cad931a436, 0x3feeaac7d98a6699,
+ 0x3feeabd0a478580f, 0x3feeace5422aa0db, 0x3feeae05bad61778,
+ 0x3feeaf3216b5448c, 0x3feeb06a5e0866d9, 0x3feeb1ae99157736,
+ 0x3feeb2fed0282c8a, 0x3feeb45b0b91ffc6, 0x3feeb5c353aa2fe2,
+ 0x3feeb737b0cdc5e5, 0x3feeb8b82b5f98e5, 0x3feeba44cbc8520f,
+ 0x3feebbdd9a7670b3, 0x3feebd829fde4e50, 0x3feebf33e47a22a2,
+ 0x3feec0f170ca07ba, 0x3feec2bb4d53fe0d, 0x3feec49182a3f090,
+ 0x3feec674194bb8d5, 0x3feec86319e32323, 0x3feeca5e8d07f29e,
+ 0x3feecc667b5de565, 0x3feece7aed8eb8bb, 0x3feed09bec4a2d33,
+ 0x3feed2c980460ad8, 0x3feed503b23e255d, 0x3feed74a8af46052,
+ 0x3feed99e1330b358, 0x3feedbfe53c12e59, 0x3feede6b5579fdbf,
+ 0x3feee0e521356eba, 0x3feee36bbfd3f37a, 0x3feee5ff3a3c2774,
+ 0x3feee89f995ad3ad, 0x3feeeb4ce622f2ff, 0x3feeee07298db666,
+ 0x3feef0ce6c9a8952, 0x3feef3a2b84f15fb, 0x3feef68415b749b1,
+ 0x3feef9728de5593a, 0x3feefc6e29f1c52a, 0x3feeff76f2fb5e47,
+ 0x3fef028cf22749e4, 0x3fef05b030a1064a, 0x3fef08e0b79a6f1f,
+ 0x3fef0c1e904bc1d2, 0x3fef0f69c3f3a207, 0x3fef12c25bd71e09,
+ 0x3fef16286141b33d, 0x3fef199bdd85529c, 0x3fef1d1cd9fa652c,
+ 0x3fef20ab5fffd07a, 0x3fef244778fafb22, 0x3fef27f12e57d14b,
+ 0x3fef2ba88988c933, 0x3fef2f6d9406e7b5, 0x3fef33405751c4db,
+ 0x3fef3720dcef9069, 0x3fef3b0f2e6d1675, 0x3fef3f0b555dc3fa,
+ 0x3fef43155b5bab74, 0x3fef472d4a07897c, 0x3fef4b532b08c968,
+ 0x3fef4f87080d89f2, 0x3fef53c8eacaa1d6, 0x3fef5818dcfba487,
+ 0x3fef5c76e862e6d3, 0x3fef60e316c98398, 0x3fef655d71ff6075,
+ 0x3fef69e603db3285, 0x3fef6e7cd63a8315, 0x3fef7321f301b460,
+ 0x3fef77d5641c0658, 0x3fef7c97337b9b5f, 0x3fef81676b197d17,
+ 0x3fef864614f5a129, 0x3fef8b333b16ee12, 0x3fef902ee78b3ff6,
+ 0x3fef953924676d76, 0x3fef9a51fbc74c83, 0x3fef9f7977cdb740,
+ 0x3fefa4afa2a490da, 0x3fefa9f4867cca6e, 0x3fefaf482d8e67f1,
+ 0x3fefb4aaa2188510, 0x3fefba1bee615a27, 0x3fefbf9c1cb6412a,
+ 0x3fefc52b376bba97, 0x3fefcac948dd7274, 0x3fefd0765b6e4540,
+ 0x3fefd632798844f8, 0x3fefdbfdad9cbe14, 0x3fefe1d802243c89,
+ 0x3fefe7c1819e90d8, 0x3fefedba3692d514, 0x3feff3c22b8f71f1,
+ 0x3feff9d96b2a23d9};
diff --git a/pl/math/v_expf.c b/pl/math/v_expf.c
new file mode 100644
index 0000000..a422e69
--- /dev/null
+++ b/pl/math/v_expf.c
@@ -0,0 +1,83 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "mathlib.h"
+#if V_SUPPORTED
+
+static const float Poly[] = {
+ /* maxerr: 1.45358 +0.5 ulp. */
+ 0x1.0e4020p-7f,
+ 0x1.573e2ep-5f,
+ 0x1.555e66p-3f,
+ 0x1.fffdb6p-2f,
+ 0x1.ffffecp-1f,
+};
+#define C0 v_f32 (Poly[0])
+#define C1 v_f32 (Poly[1])
+#define C2 v_f32 (Poly[2])
+#define C3 v_f32 (Poly[3])
+#define C4 v_f32 (Poly[4])
+
+#define Shift v_f32 (0x1.8p23f)
+#define InvLn2 v_f32 (0x1.715476p+0f)
+#define Ln2hi v_f32 (0x1.62e4p-1f)
+#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
+
+VPCS_ATTR
+static v_f32_t
+specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale)
+{
+ /* 2^n may overflow, break it up into s1*s2. */
+ v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x82000000);
+ v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b);
+ v_f32_t s2 = v_as_f32_u32 (e - b);
+ v_u32_t cmp2 = v_cond_u32 (absn > v_f32 (192.0f));
+ v_u32_t r2 = v_as_u32_f32 (s1 * s1);
+ v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1);
+ /* Similar to r1 but avoids double rounding in the subnormal range. */
+ v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale));
+ return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0));
+}
+
+VPCS_ATTR
+v_f32_t
+V_NAME(expf) (v_f32_t x)
+{
+ v_f32_t n, r, r2, scale, p, q, poly, absn, z;
+ v_u32_t cmp, e;
+
+ /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+#if 1
+ z = v_fma_f32 (x, InvLn2, Shift);
+ n = z - Shift;
+ r = v_fma_f32 (n, -Ln2hi, x);
+ r = v_fma_f32 (n, -Ln2lo, r);
+ e = v_as_u32_f32 (z) << 23;
+#else
+ z = x * InvLn2;
+ n = v_round_f32 (z);
+ r = v_fma_f32 (n, -Ln2hi, x);
+ r = v_fma_f32 (n, -Ln2lo, r);
+ e = v_as_u32_s32 (v_round_s32 (z)) << 23;
+#endif
+ scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
+ absn = v_abs_f32 (n);
+ cmp = v_cond_u32 (absn > v_f32 (126.0f));
+ r2 = r * r;
+ p = v_fma_f32 (C0, r, C1);
+ q = v_fma_f32 (C2, r, C3);
+ q = v_fma_f32 (p, r2, q);
+ p = C4 * r;
+ poly = v_fma_f32 (q, r2, p);
+ if (unlikely (v_any_u32 (cmp)))
+ return specialcase (poly, n, e, absn, cmp, scale);
+ return v_fma_f32 (poly, scale, scale);
+}
+VPCS_ALIAS
+#endif
diff --git a/pl/math/v_expm1_2u5.c b/pl/math/v_expm1_2u5.c
new file mode 100644
index 0000000..4b491d1
--- /dev/null
+++ b/pl/math/v_expm1_2u5.c
@@ -0,0 +1,113 @@
+/*
+ * Double-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define InvLn2 v_f64 (0x1.71547652b82fep0)
+#define MLn2hi v_f64 (-0x1.62e42fefa39efp-1)
+#define MLn2lo v_f64 (-0x1.abc9e3b39803fp-56)
+#define Shift v_f64 (0x1.8p52)
+#define TinyBound \
+ 0x3cc0000000000000 /* 0x1p-51, below which expm1(x) is within 2 ULP of x. */
+#define SpecialBound \
+ 0x40862b7d369a5aa9 /* 0x1.62b7d369a5aa9p+9. For |x| > SpecialBound, the \
+ final stage of the algorithm overflows so fall back to \
+ scalar. */
+#define AbsMask 0x7fffffffffffffff
+#define One 0x3ff0000000000000
+
+#define C(i) v_f64 (__expm1_poly[i])
+
+static inline v_f64_t
+eval_poly (v_f64_t f, v_f64_t f2)
+{
+ /* Evaluate custom polynomial using Estrin scheme. */
+ v_f64_t p_01 = v_fma_f64 (f, C (1), C (0));
+ v_f64_t p_23 = v_fma_f64 (f, C (3), C (2));
+ v_f64_t p_45 = v_fma_f64 (f, C (5), C (4));
+ v_f64_t p_67 = v_fma_f64 (f, C (7), C (6));
+ v_f64_t p_89 = v_fma_f64 (f, C (9), C (8));
+
+ v_f64_t p_03 = v_fma_f64 (f2, p_23, p_01);
+ v_f64_t p_47 = v_fma_f64 (f2, p_67, p_45);
+ v_f64_t p_8a = v_fma_f64 (f2, C (10), p_89);
+
+ v_f64_t f4 = f2 * f2;
+ v_f64_t p_07 = v_fma_f64 (f4, p_47, p_03);
+ return v_fma_f64 (f4 * f4, p_8a, p_07);
+}
+
+/* Double-precision vector exp(x) - 1 function.
+ The maximum error observed error is 2.18 ULP:
+ __v_expm1(0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
+ want 0x1.a8b9ea8d66e2p-2. */
+VPCS_ATTR
+v_f64_t V_NAME (expm1) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t ax = ix & AbsMask;
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered correctly, fall back to the scalar
+ variant for all lanes if any of them should trigger an exception. */
+ v_u64_t special = v_cond_u64 ((ax >= SpecialBound) | (ax <= TinyBound));
+ if (unlikely (v_any_u64 (special)))
+ return v_call_f64 (expm1, x, x, v_u64 (-1));
+#else
+ /* Large input, NaNs and Infs. */
+ v_u64_t special
+ = v_cond_u64 ((ax >= SpecialBound) | (ix == 0x8000000000000000));
+#endif
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ v_f64_t j = v_fma_f64 (InvLn2, x, Shift) - Shift;
+ v_s64_t i = v_to_s64_f64 (j);
+ v_f64_t f = v_fma_f64 (j, MLn2hi, x);
+ f = v_fma_f64 (j, MLn2lo, f);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ v_f64_t f2 = f * f;
+ v_f64_t p = v_fma_f64 (f2, eval_poly (f, f2), f);
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ v_f64_t t = v_as_f64_u64 (v_as_u64_s64 (i << 52) + One);
+ /* expm1(x) ~= p * t + (t - 1). */
+ v_f64_t y = v_fma_f64 (p, t, t - 1);
+
+#if !WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u64 (special)))
+ return v_call_f64 (expm1, x, y, special);
+#endif
+
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (V_NAME (expm1), 1.68)
+PL_TEST_EXPECT_FENV (V_NAME (expm1), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (expm1), 0, 0x1p-51, 1000)
+PL_TEST_INTERVAL (V_NAME (expm1), -0, -0x1p-51, 1000)
+PL_TEST_INTERVAL (V_NAME (expm1), 0x1p-51, 0x1.63108c75a1937p+9, 100000)
+PL_TEST_INTERVAL (V_NAME (expm1), -0x1p-51, -0x1.740bf7c0d927dp+9, 100000)
+PL_TEST_INTERVAL (V_NAME (expm1), 0x1.63108c75a1937p+9, inf, 100)
+PL_TEST_INTERVAL (V_NAME (expm1), -0x1.740bf7c0d927dp+9, -inf, 100)
+#endif
diff --git a/pl/math/v_expm1f_1u6.c b/pl/math/v_expm1f_1u6.c
new file mode 100644
index 0000000..ab13242
--- /dev/null
+++ b/pl/math/v_expm1f_1u6.c
@@ -0,0 +1,94 @@
+/*
+ * Single-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define Shift v_f32 (0x1.8p23f)
+#define InvLn2 v_f32 (0x1.715476p+0f)
+#define MLn2hi v_f32 (-0x1.62e4p-1f)
+#define MLn2lo v_f32 (-0x1.7f7d1cp-20f)
+#define AbsMask (0x7fffffff)
+#define One (0x3f800000)
+#define SpecialBound \
+ (0x42af5e20) /* asuint(0x1.5ebc4p+6). Largest value of x for which expm1(x) \
+ should round to -1. */
+#define TinyBound (0x34000000) /* asuint(0x1p-23). */
+
+#define C(i) v_f32 (__expm1f_poly[i])
+
+/* Single-precision vector exp(x) - 1 function.
+ The maximum error is 1.51 ULP:
+ expm1f(0x1.8baa96p-2) got 0x1.e2fb9p-2
+ want 0x1.e2fb94p-2. */
+VPCS_ATTR
+v_f32_t V_NAME (expm1f) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t ax = ix & AbsMask;
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered correctly, fall back to the scalar
+ variant for all lanes if any of them should trigger an exception. */
+ v_u32_t special
+ = v_cond_u32 ((ax >= SpecialBound) | (ix == 0x80000000) | (ax < TinyBound));
+ if (unlikely (v_any_u32 (special)))
+ return v_call_f32 (expm1f, x, x, v_u32 (0xffffffff));
+#else
+ /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf and -0. */
+ v_u32_t special = v_cond_u32 ((ax >= SpecialBound) | (ix == 0x80000000));
+#endif
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ v_f32_t j = v_fma_f32 (InvLn2, x, Shift) - Shift;
+ v_s32_t i = v_to_s32_f32 (j);
+ v_f32_t f = v_fma_f32 (j, MLn2hi, x);
+ f = v_fma_f32 (j, MLn2lo, f);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+
+ v_f32_t p = v_fma_f32 (C (4), f, C (3));
+ p = v_fma_f32 (p, f, C (2));
+ p = v_fma_f32 (p, f, C (1));
+ p = v_fma_f32 (p, f, C (0));
+ p = v_fma_f32 (f * f, p, f);
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ v_f32_t t = v_as_f32_u32 (v_as_u32_s32 (i << 23) + One);
+ /* expm1(x) ~= p * t + (t - 1). */
+ v_f32_t y = v_fma_f32 (p, t, t - 1);
+
+#if !WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (special)))
+ return v_call_f32 (expm1f, x, y, special);
+#endif
+
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (V_NAME (expm1f), 1.02)
+PL_TEST_EXPECT_FENV (V_NAME (expm1f), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (expm1f), 0, 0x1p-23, 1000)
+PL_TEST_INTERVAL (V_NAME (expm1f), -0, -0x1p-23, 1000)
+PL_TEST_INTERVAL (V_NAME (expm1f), 0x1p-23, 0x1.644716p6, 1000000)
+PL_TEST_INTERVAL (V_NAME (expm1f), -0x1p-23, -0x1.9bbabcp+6, 1000000)
+#endif
diff --git a/pl/math/v_expm1f_inline.h b/pl/math/v_expm1f_inline.h
new file mode 100644
index 0000000..c261941
--- /dev/null
+++ b/pl/math/v_expm1f_inline.h
@@ -0,0 +1,49 @@
+/*
+ * Helper for single-precision routines which calculate exp(x) - 1 and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_V_EXPM1F_INLINE_H
+#define PL_MATH_V_EXPM1F_INLINE_H
+
+#include "v_math.h"
+#include "math_config.h"
+#include "estrinf.h"
+
+#define One 0x3f800000
+#define Shift v_f32 (0x1.8p23f)
+#define InvLn2 v_f32 (0x1.715476p+0f)
+#define MLn2hi v_f32 (-0x1.62e4p-1f)
+#define MLn2lo v_f32 (-0x1.7f7d1cp-20f)
+
+#define C(i) v_f32 (__expm1f_poly[i])
+
+static inline v_f32_t
+expm1f_inline (v_f32_t x)
+{
+ /* Helper routine for calculating exp(x) - 1.
+ Copied from v_expm1f_1u6.c, with all special-case handling removed - the
+ calling routine should handle special values if required. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ v_f32_t j = v_fma_f32 (InvLn2, x, Shift) - Shift;
+ v_s32_t i = v_to_s32_f32 (j);
+ v_f32_t f = v_fma_f32 (j, MLn2hi, x);
+ f = v_fma_f32 (j, MLn2lo, f);
+
+ /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
+ Uses Estrin scheme, where the main __v_expm1f routine uses Horner. */
+ v_f32_t f2 = f * f;
+ v_f32_t p = ESTRIN_4 (f, f2, f2 * f2, C);
+ p = v_fma_f32 (f2, p, f);
+
+ /* t = 2^i. */
+ v_f32_t t = v_as_f32_u32 (v_as_u32_s32 (i << 23) + One);
+ /* expm1(x) ~= p * t + (t - 1). */
+ return v_fma_f32 (p, t, t - 1);
+}
+
+#endif // PL_MATH_V_EXPM1F_INLINE_H
diff --git a/pl/math/v_log10_2u5.c b/pl/math/v_log10_2u5.c
new file mode 100644
index 0000000..86d398c
--- /dev/null
+++ b/pl/math/v_log10_2u5.c
@@ -0,0 +1,110 @@
+/*
+ * Double-precision vector log10(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "include/mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define A(i) v_f64 (__v_log10_data.poly[i])
+#define T(s, i) __v_log10_data.tab[i].s
+#define Ln2 v_f64 (0x1.62e42fefa39efp-1)
+#define N (1 << V_LOG10_TABLE_BITS)
+#define OFF v_u64 (0x3fe6900900000000)
+
+struct entry
+{
+ v_f64_t invc;
+ v_f64_t log10c;
+};
+
+static inline struct entry
+lookup (v_u64_t i)
+{
+ struct entry e;
+#ifdef SCALAR
+ e.invc = T (invc, i);
+ e.log10c = T (log10c, i);
+#else
+ e.invc[0] = T (invc, i[0]);
+ e.log10c[0] = T (log10c, i[0]);
+ e.invc[1] = T (invc, i[1]);
+ e.log10c[1] = T (log10c, i[1]);
+#endif
+ return e;
+}
+
+VPCS_ATTR
+inline static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
+{
+ return v_call_f64 (log10, x, y, cmp);
+}
+
+/* Our implementation of v_log10 is a slight modification of v_log (1.660ulps).
+ Max ULP error: < 2.5 ulp (nearest rounding.)
+ Maximum measured at 2.46 ulp for x in [0.96, 0.97]
+ __v_log10(0x1.13192407fcb46p+0) got 0x1.fff6be3cae4bbp-6
+ want 0x1.fff6be3cae4b9p-6
+ -0.459999 ulp err 1.96. */
+VPCS_ATTR
+v_f64_t V_NAME (log10) (v_f64_t x)
+{
+ v_f64_t z, r, r2, p, y, kd, hi;
+ v_u64_t ix, iz, tmp, top, i, cmp;
+ v_s64_t k;
+ struct entry e;
+
+ ix = v_as_u64_f64 (x);
+ top = ix >> 48;
+ cmp = v_cond_u64 (top - v_u64 (0x0010) >= v_u64 (0x7ff0 - 0x0010));
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (52 - V_LOG10_TABLE_BITS)) % N;
+ k = v_as_s64_u64 (tmp) >> 52; /* arithmetic shift. */
+ iz = ix - (tmp & v_u64 (0xfffULL << 52));
+ z = v_as_f64_u64 (iz);
+ e = lookup (i);
+
+ /* log10(x) = log1p(z/c-1)/log(10) + log10(c) + k*log10(2). */
+ r = v_fma_f64 (z, e.invc, v_f64 (-1.0));
+ kd = v_to_f64_s64 (k);
+
+ /* hi = r / log(10) + log10(c) + k*log10(2).
+ Constants in `v_log10_data.c` are computed (in extended precision) as
+ e.log10c := e.logc * ivln10. */
+ v_f64_t w = v_fma_f64 (r, v_f64 (__v_log10_data.invln10), e.log10c);
+
+ /* y = log10(1+r) + n * log10(2). */
+ hi = v_fma_f64 (kd, v_f64 (__v_log10_data.log10_2), w);
+
+ /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
+ r2 = r * r;
+ y = v_fma_f64 (A (3), r, A (2));
+ p = v_fma_f64 (A (1), r, A (0));
+ y = v_fma_f64 (A (4), r2, y);
+ y = v_fma_f64 (y, r2, p);
+ y = v_fma_f64 (y, r2, hi);
+
+ if (unlikely (v_any_u64 (cmp)))
+ return specialcase (x, y, cmp);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (V_NAME (log10), 1.97)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (log10))
+PL_TEST_INTERVAL (V_NAME (log10), 0, 0xffff000000000000, 10000)
+PL_TEST_INTERVAL (V_NAME (log10), 0x1p-4, 0x1p4, 400000)
+PL_TEST_INTERVAL (V_NAME (log10), 0, inf, 400000)
+#endif
diff --git a/pl/math/v_log10_data.c b/pl/math/v_log10_data.c
new file mode 100644
index 0000000..fda85c8
--- /dev/null
+++ b/pl/math/v_log10_data.c
@@ -0,0 +1,167 @@
+/*
+ * Lookup table for double-precision log10(x) vector function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << V_LOG10_TABLE_BITS)
+
+/* Algorithm:
+
+ x = 2^k z
+ log10(x) = k log10(2) + log10(c) + poly(z/c - 1) / log(10)
+
+where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,N=128)
+and log(c) and 1/c for the ith subinterval comes from a lookup table:
+
+ tab[i].invc = 1/c
+ tab[i].log10c = (double)log10(c)
+
+where c is near the center of the subinterval and is chosen by trying several
+floating point invc candidates around 1/center and selecting one for which
+the error in (double)log(c) is minimized (< 0x1p-74), except the subinterval
+that contains 1 and the previous one got tweaked to avoid cancellation.
+NB: invc should be optimized to minimize error in (double)log10(c) instead. */
+const struct v_log10_data __v_log10_data
+ = {.tab = {{0x1.6a133d0dec120p+0, -0x1.345825f221684p-3},
+ {0x1.6815f2f3e42edp+0, -0x1.2f71a1f0c554ep-3},
+ {0x1.661e39be1ac9ep+0, -0x1.2a91fdb30b1f4p-3},
+ {0x1.642bfa30ac371p+0, -0x1.25b9260981a04p-3},
+ {0x1.623f1d916f323p+0, -0x1.20e7081762193p-3},
+ {0x1.60578da220f65p+0, -0x1.1c1b914aeefacp-3},
+ {0x1.5e75349dea571p+0, -0x1.1756af5de404dp-3},
+ {0x1.5c97fd387a75ap+0, -0x1.12985059c90bfp-3},
+ {0x1.5abfd2981f200p+0, -0x1.0de0628f63df4p-3},
+ {0x1.58eca051dc99cp+0, -0x1.092ed492e08eep-3},
+ {0x1.571e526d9df12p+0, -0x1.0483954caf1dfp-3},
+ {0x1.5554d555b3fcbp+0, -0x1.ffbd27a9adbcp-4},
+ {0x1.539015e2a20cdp+0, -0x1.f67f7f2e3d1ap-4},
+ {0x1.51d0014ee0164p+0, -0x1.ed4e1071ceebep-4},
+ {0x1.50148538cd9eep+0, -0x1.e428bb47413c4p-4},
+ {0x1.4e5d8f9f698a1p+0, -0x1.db0f6003028d6p-4},
+ {0x1.4cab0edca66bep+0, -0x1.d201df6749831p-4},
+ {0x1.4afcf1a9db874p+0, -0x1.c9001ac5c9672p-4},
+ {0x1.495327136e16fp+0, -0x1.c009f3c78c79p-4},
+ {0x1.47ad9e84af28fp+0, -0x1.b71f4cb642e53p-4},
+ {0x1.460c47b39ae15p+0, -0x1.ae400818526b2p-4},
+ {0x1.446f12b278001p+0, -0x1.a56c091954f87p-4},
+ {0x1.42d5efdd720ecp+0, -0x1.9ca3332f096eep-4},
+ {0x1.4140cfe001a0fp+0, -0x1.93e56a3f23e55p-4},
+ {0x1.3fafa3b421f69p+0, -0x1.8b3292a3903bp-4},
+ {0x1.3e225c9c8ece5p+0, -0x1.828a9112d9618p-4},
+ {0x1.3c98ec29a211ap+0, -0x1.79ed4ac35f5acp-4},
+ {0x1.3b13442a413fep+0, -0x1.715aa51ed28c4p-4},
+ {0x1.399156baa3c54p+0, -0x1.68d2861c999e9p-4},
+ {0x1.38131639b4cdbp+0, -0x1.6054d40ded21p-4},
+ {0x1.36987540fbf53p+0, -0x1.57e17576bc9a2p-4},
+ {0x1.352166b648f61p+0, -0x1.4f7851798bb0bp-4},
+ {0x1.33adddb3eb575p+0, -0x1.47194f5690ae3p-4},
+ {0x1.323dcd99fc1d3p+0, -0x1.3ec456d58ec47p-4},
+ {0x1.30d129fefc7d2p+0, -0x1.36794ff3e5f55p-4},
+ {0x1.2f67e6b72fe7dp+0, -0x1.2e382315725e4p-4},
+ {0x1.2e01f7cf8b187p+0, -0x1.2600b8ed82e91p-4},
+ {0x1.2c9f518ddc86ep+0, -0x1.1dd2fa85efc12p-4},
+ {0x1.2b3fe86e5f413p+0, -0x1.15aed136e3961p-4},
+ {0x1.29e3b1211b25cp+0, -0x1.0d94269d1a30dp-4},
+ {0x1.288aa08b373cfp+0, -0x1.0582e4a7659f5p-4},
+ {0x1.2734abcaa8467p+0, -0x1.faf5eb655742dp-5},
+ {0x1.25e1c82459b81p+0, -0x1.eaf888487e8eep-5},
+ {0x1.2491eb1ad59c5p+0, -0x1.db0d75ef25a82p-5},
+ {0x1.23450a54048b5p+0, -0x1.cb348a49e6431p-5},
+ {0x1.21fb1bb09e578p+0, -0x1.bb6d9c69acdd8p-5},
+ {0x1.20b415346d8f7p+0, -0x1.abb88368aa7ap-5},
+ {0x1.1f6fed179a1acp+0, -0x1.9c1517476af14p-5},
+ {0x1.1e2e99b93c7b3p+0, -0x1.8c833051bfa4dp-5},
+ {0x1.1cf011a7a882ap+0, -0x1.7d02a78e7fb31p-5},
+ {0x1.1bb44b97dba5ap+0, -0x1.6d93565e97c5fp-5},
+ {0x1.1a7b3e66cdd4fp+0, -0x1.5e351695db0c5p-5},
+ {0x1.1944e11dc56cdp+0, -0x1.4ee7c2ba67adcp-5},
+ {0x1.18112aebb1a6ep+0, -0x1.3fab35ba16c01p-5},
+ {0x1.16e013231b7e9p+0, -0x1.307f4ad854bc9p-5},
+ {0x1.15b1913f156cfp+0, -0x1.2163ddf4f988cp-5},
+ {0x1.14859cdedde13p+0, -0x1.1258cb5d19e22p-5},
+ {0x1.135c2dc68cfa4p+0, -0x1.035defdba3188p-5},
+ {0x1.12353bdb01684p+0, -0x1.e8e651191bce4p-6},
+ {0x1.1110bf25b85b4p+0, -0x1.cb30a62be444cp-6},
+ {0x1.0feeafd2f8577p+0, -0x1.ad9a9b3043823p-6},
+ {0x1.0ecf062c51c3bp+0, -0x1.9023ecda1ccdep-6},
+ {0x1.0db1baa076c8bp+0, -0x1.72cc592bd82dp-6},
+ {0x1.0c96c5bb3048ep+0, -0x1.55939eb1f9c6ep-6},
+ {0x1.0b7e20263e070p+0, -0x1.38797ca6cc5ap-6},
+ {0x1.0a67c2acd0ce3p+0, -0x1.1b7db35c2c072p-6},
+ {0x1.0953a6391e982p+0, -0x1.fd400812ee9a2p-7},
+ {0x1.0841c3caea380p+0, -0x1.c3c05fb4620f1p-7},
+ {0x1.07321489b13eap+0, -0x1.8a7bf3c40e2e3p-7},
+ {0x1.062491aee9904p+0, -0x1.517249c15a75cp-7},
+ {0x1.05193497a7cc5p+0, -0x1.18a2ea5330c91p-7},
+ {0x1.040ff6b5f5e9fp+0, -0x1.c01abc8cdc4e2p-8},
+ {0x1.0308d19aa6127p+0, -0x1.4f6261750dec9p-8},
+ {0x1.0203beedb0c67p+0, -0x1.be37b6612afa7p-9},
+ {0x1.010037d38bcc2p+0, -0x1.bc3a8398ac26p-10},
+ {1.0, 0.0},
+ {0x1.fc06d493cca10p-1, 0x1.bb796219f30a5p-9},
+ {0x1.f81e6ac3b918fp-1, 0x1.b984fdcba61cep-8},
+ {0x1.f44546ef18996p-1, 0x1.49cf12adf8e8cp-7},
+ {0x1.f07b10382c84bp-1, 0x1.b6075b5217083p-7},
+ {0x1.ecbf7070e59d4p-1, 0x1.10b7466fc30ddp-6},
+ {0x1.e91213f715939p-1, 0x1.4603e4db6a3a1p-6},
+ {0x1.e572a9a75f7b7p-1, 0x1.7aeb10e99e105p-6},
+ {0x1.e1e0e2c530207p-1, 0x1.af6e49b0f0e36p-6},
+ {0x1.de5c72d8a8be3p-1, 0x1.e38f064f41179p-6},
+ {0x1.dae50fa5658ccp-1, 0x1.0ba75abbb7623p-5},
+ {0x1.d77a71145a2dap-1, 0x1.25575ee2dba86p-5},
+ {0x1.d41c51166623ep-1, 0x1.3ed83f477f946p-5},
+ {0x1.d0ca6ba0bb29fp-1, 0x1.582aa79af60efp-5},
+ {0x1.cd847e8e59681p-1, 0x1.714f400fa83aep-5},
+ {0x1.ca4a499693e00p-1, 0x1.8a46ad3901cb9p-5},
+ {0x1.c71b8e399e821p-1, 0x1.a311903b6b87p-5},
+ {0x1.c3f80faf19077p-1, 0x1.bbb086f216911p-5},
+ {0x1.c0df92dc2b0ecp-1, 0x1.d4242bdda648ep-5},
+ {0x1.bdd1de3cbb542p-1, 0x1.ec6d167c2af1p-5},
+ {0x1.baceb9e1007a3p-1, 0x1.0245ed8221426p-4},
+ {0x1.b7d5ef543e55ep-1, 0x1.0e40856c74f64p-4},
+ {0x1.b4e749977d953p-1, 0x1.1a269a31120fep-4},
+ {0x1.b20295155478ep-1, 0x1.25f8718fc076cp-4},
+ {0x1.af279f8e82be2p-1, 0x1.31b64ffc95bfp-4},
+ {0x1.ac5638197fdf3p-1, 0x1.3d60787ca5063p-4},
+ {0x1.a98e2f102e087p-1, 0x1.48f72ccd187fdp-4},
+ {0x1.a6cf5606d05c1p-1, 0x1.547aad6602f1cp-4},
+ {0x1.a4197fc04d746p-1, 0x1.5feb3989d3acbp-4},
+ {0x1.a16c80293dc01p-1, 0x1.6b490f3978c79p-4},
+ {0x1.9ec82c4dc5bc9p-1, 0x1.76946b3f5e703p-4},
+ {0x1.9c2c5a491f534p-1, 0x1.81cd895717c83p-4},
+ {0x1.9998e1480b618p-1, 0x1.8cf4a4055c30ep-4},
+ {0x1.970d9977c6c2dp-1, 0x1.9809f4c48c0ebp-4},
+ {0x1.948a5c023d212p-1, 0x1.a30db3f9899efp-4},
+ {0x1.920f0303d6809p-1, 0x1.ae001905458fcp-4},
+ {0x1.8f9b698a98b45p-1, 0x1.b8e15a2e3a2cdp-4},
+ {0x1.8d2f6b81726f6p-1, 0x1.c3b1ace2b0996p-4},
+ {0x1.8acae5bb55badp-1, 0x1.ce71456edfa62p-4},
+ {0x1.886db5d9275b8p-1, 0x1.d9205759882c4p-4},
+ {0x1.8617ba567c13cp-1, 0x1.e3bf1513af0dfp-4},
+ {0x1.83c8d27487800p-1, 0x1.ee4db0412c414p-4},
+ {0x1.8180de3c5dbe7p-1, 0x1.f8cc5998de3a5p-4},
+ {0x1.7f3fbe71cdb71p-1, 0x1.019da085eaeb1p-3},
+ {0x1.7d055498071c1p-1, 0x1.06cd4acdb4e3dp-3},
+ {0x1.7ad182e54f65ap-1, 0x1.0bf542bef813fp-3},
+ {0x1.78a42c3c90125p-1, 0x1.11159f14da262p-3},
+ {0x1.767d342f76944p-1, 0x1.162e761c10d1cp-3},
+ {0x1.745c7ef26b00ap-1, 0x1.1b3fddc60d43ep-3},
+ {0x1.7241f15769d0fp-1, 0x1.2049ebac86aa6p-3},
+ {0x1.702d70d396e41p-1, 0x1.254cb4fb7836ap-3},
+ {0x1.6e1ee3700cd11p-1, 0x1.2a484e8d0d252p-3},
+ {0x1.6c162fc9cbe02p-1, 0x1.2f3ccce1c860bp-3}},
+
+ /* Computed from log coeffs div by log(10) then rounded to double
+ precision. */
+ .poly
+ = {-0x1.bcb7b1526e506p-3, 0x1.287a7636be1d1p-3, -0x1.bcb7b158af938p-4,
+ 0x1.63c78734e6d07p-4, -0x1.287461742fee4p-4},
+
+ .invln10 = 0x1.bcb7b1526e50ep-2,
+ .log10_2 = 0x1.34413509f79ffp-2
+
+};
diff --git a/pl/math/v_log10f_3u5.c b/pl/math/v_log10f_3u5.c
new file mode 100644
index 0000000..e9f7f03
--- /dev/null
+++ b/pl/math/v_log10f_3u5.c
@@ -0,0 +1,82 @@
+/*
+ * Single-precision vector log10 function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define P(i) v_f32 (__v_log10f_poly[i])
+
+#define Ln2 v_f32 (0x1.62e43p-1f) /* 0x3f317218. */
+#define InvLn10 v_f32 (0x1.bcb7b2p-2f)
+#define Min v_u32 (0x00800000)
+#define Max v_u32 (0x7f800000)
+#define Mask v_u32 (0x007fffff)
+#define Off v_u32 (0x3f2aaaab) /* 0.666667. */
+
+VPCS_ATTR
+NOINLINE static v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
+{
+ /* Fall back to scalar code. */
+ return v_call_f32 (log10f, x, y, cmp);
+}
+
+/* Our fast implementation of v_log10f uses a similar approach as v_logf.
+ With the same offset as v_logf (i.e., 2/3) it delivers about 3.3ulps with
+ order 9. This is more efficient than using a low order polynomial computed in
+ double precision.
+ Maximum error: 3.305ulps (nearest rounding.)
+ __v_log10f(0x1.555c16p+0) got 0x1.ffe2fap-4
+ want 0x1.ffe2f4p-4 -0.304916 ulp err 2.80492. */
+VPCS_ATTR
+v_f32_t V_NAME (log10f) (v_f32_t x)
+{
+ v_f32_t n, o, p, q, r, r2, y;
+ v_u32_t u, cmp;
+
+ u = v_as_u32_f32 (x);
+ cmp = v_cond_u32 (u - Min >= Max - Min);
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u -= Off;
+ n = v_to_f32_s32 (v_as_s32_u32 (u) >> 23); /* signextend. */
+ u &= Mask;
+ u += Off;
+ r = v_as_f32_u32 (u) - v_f32 (1.0f);
+
+ /* y = log10(1+r) + n*log10(2). */
+ r2 = r * r;
+ /* (n*ln2 + r)*InvLn10 + r2*(P0 + r*P1 + r2*(P2 + r*P3 + r2*(P4 + r*P5 +
+ r2*(P6+r*P7))). */
+ o = v_fma_f32 (P (7), r, P (6));
+ p = v_fma_f32 (P (5), r, P (4));
+ q = v_fma_f32 (P (3), r, P (2));
+ y = v_fma_f32 (P (1), r, P (0));
+ p = v_fma_f32 (o, r2, p);
+ q = v_fma_f32 (p, r2, q);
+ y = v_fma_f32 (q, r2, y);
+ /* Using p = Log10(2)*n + r*InvLn(10) is slightly faster
+ but less accurate. */
+ p = v_fma_f32 (Ln2, n, r);
+ y = v_fma_f32 (y, r2, p * InvLn10);
+
+ if (unlikely (v_any_u32 (cmp)))
+ return specialcase (x, y, cmp);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (V_NAME (log10f), 2.81)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (log10f))
+PL_TEST_INTERVAL (V_NAME (log10f), 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (V_NAME (log10f), 0x1p-4, 0x1p4, 500000)
+#endif
diff --git a/pl/math/v_log10f_data.c b/pl/math/v_log10f_data.c
new file mode 100644
index 0000000..537482a
--- /dev/null
+++ b/pl/math/v_log10f_data.c
@@ -0,0 +1,13 @@
+/*
+ * Coefficients for single-precision vector log10 function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+
+const float __v_log10f_poly[] = {
+ /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in
+ [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25. */
+ -0x1.bcb79cp-3f, 0x1.2879c8p-3f, -0x1.bcd472p-4f, 0x1.6408f8p-4f,
+ -0x1.246f8p-4f, 0x1.f0e514p-5f, -0x1.0fc92cp-4f, 0x1.f5f76ap-5f};
diff --git a/pl/math/v_log1p_2u5.c b/pl/math/v_log1p_2u5.c
new file mode 100644
index 0000000..e482910
--- /dev/null
+++ b/pl/math/v_log1p_2u5.c
@@ -0,0 +1,120 @@
+/*
+ * Double-precision vector log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "estrin.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define Ln2Hi v_f64 (0x1.62e42fefa3800p-1)
+#define Ln2Lo v_f64 (0x1.ef35793c76730p-45)
+#define HfRt2Top 0x3fe6a09e00000000 /* top32(asuint64(sqrt(2)/2)) << 32. */
+#define OneMHfRt2Top \
+ 0x00095f6200000000 /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) \
+ << 32. */
+#define OneTop12 0x3ff
+#define BottomMask 0xffffffff
+#define AbsMask 0x7fffffffffffffff
+#define C(i) v_f64 (__log1p_data.coeffs[i])
+
+static inline v_f64_t
+eval_poly (v_f64_t f)
+{
+ v_f64_t f2 = f * f;
+ v_f64_t f4 = f2 * f2;
+ v_f64_t f8 = f4 * f4;
+ return ESTRIN_18 (f, f2, f4, f8, f8 * f8, C);
+}
+
+VPCS_ATTR
+NOINLINE static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t special)
+{
+ return v_call_f64 (log1p, x, y, special);
+}
+
+/* Vector log1p approximation using polynomial on reduced interval. Routine is a
+ modification of the algorithm used in scalar log1p, with no shortcut for k=0
+ and no narrowing for f and k. Maximum observed error is 2.46 ULP:
+ __v_log1p(0x1.654a1307242a4p+11) got 0x1.fd5565fb590f4p+2
+ want 0x1.fd5565fb590f6p+2 . */
+VPCS_ATTR v_f64_t V_NAME (log1p) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t ia = ix & AbsMask;
+ v_u64_t special
+ = v_cond_u64 ((ia >= v_u64 (0x7ff0000000000000))
+ | (ix >= 0xbff0000000000000) | (ix == 0x8000000000000000));
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u64 (special)))
+ x = v_sel_f64 (special, v_f64 (0), x);
+#endif
+
+ /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
+ is in [sqrt(2)/2, sqrt(2)]):
+ log1p(x) = k*log(2) + log1p(f).
+
+ f may not be representable exactly, so we need a correction term:
+ let m = round(1 + x), c = (1 + x) - m.
+ c << m: at very small x, log1p(x) ~ x, hence:
+ log(1+x) - log(m) ~ c/m.
+
+ We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */
+
+ /* Obtain correctly scaled k by manipulation in the exponent.
+ The scalar algorithm casts down to 32-bit at this point to calculate k and
+ u_red. We stay in double-width to obtain f and k, using the same constants
+ as the scalar algorithm but shifted left by 32. */
+ v_f64_t m = x + 1;
+ v_u64_t mi = v_as_u64_f64 (m);
+ v_u64_t u = mi + OneMHfRt2Top;
+
+ v_s64_t ki = v_as_s64_u64 (u >> 52) - OneTop12;
+ v_f64_t k = v_to_f64_s64 (ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ v_u64_t utop = (u & 0x000fffff00000000) + HfRt2Top;
+ v_u64_t u_red = utop | (mi & BottomMask);
+ v_f64_t f = v_as_f64_u64 (u_red) - 1;
+
+ /* Correction term c/m. */
+ v_f64_t cm = (x - (m - 1)) / m;
+
+ /* Approximate log1p(x) on the reduced input using a polynomial. Because
+ log1p(0)=0 we choose an approximation of the form:
+ x + C0*x^2 + C1*x^3 + C2x^4 + ...
+ Hence approximation has the form f + f^2 * P(f)
+ where P(x) = C0 + C1*x + C2x^2 + ...
+ Assembling this all correctly is dealt with at the final step. */
+ v_f64_t p = eval_poly (f);
+
+ v_f64_t ylo = v_fma_f64 (k, Ln2Lo, cm);
+ v_f64_t yhi = v_fma_f64 (k, Ln2Hi, f);
+ v_f64_t y = v_fma_f64 (f * f, p, ylo + yhi);
+
+ if (unlikely (v_any_u64 (special)))
+ return specialcase (v_as_f64_u64 (ix), y, special);
+
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (V_NAME (log1p), 1.97)
+PL_TEST_EXPECT_FENV (V_NAME (log1p), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (log1p), -10.0, 10.0, 10000)
+PL_TEST_INTERVAL (V_NAME (log1p), 0.0, 0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME (log1p), 0x1p-23, 0.001, 50000)
+PL_TEST_INTERVAL (V_NAME (log1p), 0.001, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME (log1p), 0.0, -0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME (log1p), -0x1p-23, -0.001, 50000)
+PL_TEST_INTERVAL (V_NAME (log1p), -0.001, -1.0, 50000)
+PL_TEST_INTERVAL (V_NAME (log1p), -1.0, inf, 5000)
+#endif
diff --git a/pl/math/v_log1p_inline.h b/pl/math/v_log1p_inline.h
new file mode 100644
index 0000000..e5c7339
--- /dev/null
+++ b/pl/math/v_log1p_inline.h
@@ -0,0 +1,77 @@
+/*
+ * Helper for vector double-precision routines which calculate log(1 + x) and do
+ * not need special-case handling
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#ifndef PL_MATH_V_LOG1P_INLINE_H
+#define PL_MATH_V_LOG1P_INLINE_H
+
+#include "v_math.h"
+#include "pairwise_horner.h"
+
+#define Ln2Hi v_f64 (0x1.62e42fefa3800p-1)
+#define Ln2Lo v_f64 (0x1.ef35793c76730p-45)
+#define HfRt2Top 0x3fe6a09e00000000 /* top32(asuint64(sqrt(2)/2)) << 32. */
+#define OneMHfRt2Top \
+ 0x00095f6200000000 /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) \
+ << 32. */
+#define OneTop 0x3ff
+#define BottomMask 0xffffffff
+#define BigBoundTop 0x5fe /* top12 (asuint64 (0x1p511)). */
+
+#define C(i) v_f64 (__log1p_data.coeffs[i])
+
+static inline v_f64_t
+log1p_inline (v_f64_t x)
+{
+ /* Helper for calculating log(x + 1). Copied from v_log1p_2u5.c, with several
+ modifications:
+ - No special-case handling - this should be dealt with by the caller.
+ - Pairwise Horner polynomial evaluation for improved accuracy.
+ - Optionally simulate the shortcut for k=0, used in the scalar routine,
+ using v_sel, for improved accuracy when the argument to log1p is close to
+ 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 in
+ the source of the caller before including this file.
+ See v_log1pf_2u1.c for details of the algorithm. */
+ v_f64_t m = x + 1;
+ v_u64_t mi = v_as_u64_f64 (m);
+ v_u64_t u = mi + OneMHfRt2Top;
+
+ v_s64_t ki = v_as_s64_u64 (u >> 52) - OneTop;
+ v_f64_t k = v_to_f64_s64 (ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ v_u64_t utop = (u & 0x000fffff00000000) + HfRt2Top;
+ v_u64_t u_red = utop | (mi & BottomMask);
+ v_f64_t f = v_as_f64_u64 (u_red) - 1;
+
+ /* Correction term c/m. */
+ v_f64_t cm = (x - (m - 1)) / m;
+
+#ifndef WANT_V_LOG1P_K0_SHORTCUT
+#error \
+ "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+#elif WANT_V_LOG1P_K0_SHORTCUT
+ /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
+ that the approximation is solely the polynomial. */
+ v_u64_t k0 = k == 0;
+ if (unlikely (v_any_u64 (k0)))
+ {
+ cm = v_sel_f64 (k0, v_f64 (0), cm);
+ f = v_sel_f64 (k0, x, f);
+ }
+#endif
+
+ /* Approximate log1p(f) on the reduced input using a polynomial. */
+ v_f64_t f2 = f * f;
+ v_f64_t p = PAIRWISE_HORNER_18 (f, f2, C);
+
+ /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */
+ v_f64_t ylo = v_fma_f64 (k, Ln2Lo, cm);
+ v_f64_t yhi = v_fma_f64 (k, Ln2Hi, f);
+ return v_fma_f64 (f2, p, ylo + yhi);
+}
+
+#endif // PL_MATH_V_LOG1P_INLINE_H
diff --git a/pl/math/v_log1pf_2u1.c b/pl/math/v_log1pf_2u1.c
new file mode 100644
index 0000000..4a7732b
--- /dev/null
+++ b/pl/math/v_log1pf_2u1.c
@@ -0,0 +1,160 @@
+/*
+ * Single-precision vector log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define AbsMask 0x7fffffff
+#define TinyBound 0x340 /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */
+#define MinusOne 0xbf800000
+#define Ln2 (0x1.62e43p-1f)
+#define Four 0x40800000
+#define ThreeQuarters v_u32 (0x3f400000)
+
+#define C(i) v_f32 (__log1pf_data.coeffs[i])
+
+static inline v_f32_t
+eval_poly (v_f32_t m)
+{
+#ifdef V_LOG1PF_1U3
+
+ /* Approximate log(1+m) on [-0.25, 0.5] using Horner scheme. */
+ v_f32_t p = v_fma_f32 (C (8), m, C (7));
+ p = v_fma_f32 (p, m, C (6));
+ p = v_fma_f32 (p, m, C (5));
+ p = v_fma_f32 (p, m, C (4));
+ p = v_fma_f32 (p, m, C (3));
+ p = v_fma_f32 (p, m, C (2));
+ p = v_fma_f32 (p, m, C (1));
+ p = v_fma_f32 (p, m, C (0));
+ return v_fma_f32 (m, m * p, m);
+
+#elif defined(V_LOG1PF_2U5)
+
+ /* Approximate log(1+m) on [-0.25, 0.5] using Estrin scheme. */
+ v_f32_t p_12 = v_fma_f32 (m, C (1), C (0));
+ v_f32_t p_34 = v_fma_f32 (m, C (3), C (2));
+ v_f32_t p_56 = v_fma_f32 (m, C (5), C (4));
+ v_f32_t p_78 = v_fma_f32 (m, C (7), C (6));
+
+ v_f32_t m2 = m * m;
+ v_f32_t p_02 = v_fma_f32 (m2, p_12, m);
+ v_f32_t p_36 = v_fma_f32 (m2, p_56, p_34);
+ v_f32_t p_79 = v_fma_f32 (m2, C (8), p_78);
+
+ v_f32_t m4 = m2 * m2;
+ v_f32_t p_06 = v_fma_f32 (m4, p_36, p_02);
+
+ return v_fma_f32 (m4, m4 * p_79, p_06);
+
+#else
+#error No precision specified for v_log1pf
+#endif
+}
+
+static inline float
+handle_special (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & AbsMask;
+ if (ix == 0xff800000 || ia > 0x7f800000 || ix > 0xbf800000)
+ {
+ /* x == -Inf => log1pf(x) = NaN.
+ x < -1.0 => log1pf(x) = NaN.
+ x == +/-NaN => log1pf(x) = NaN. */
+#if WANT_SIMD_EXCEPT
+ return __math_invalidf (asfloat (ia));
+#else
+ return NAN;
+#endif
+ }
+ if (ix == 0xbf800000)
+ {
+ /* x == -1.0 => log1pf(x) = -Inf. */
+#if WANT_SIMD_EXCEPT
+ return __math_divzerof (ix);
+#else
+ return -INFINITY;
+#endif
+ }
+ /* |x| < TinyBound => log1p(x) = x. */
+ return x;
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Accuracy is
+ the same as for the scalar algorithm, i.e. worst-case error when using Estrin
+ is roughly 2.02 ULP:
+ log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
+VPCS_ATTR v_f32_t V_NAME (log1pf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t ia12 = (ix >> 20) & v_u32 (0x7f8);
+ v_u32_t special_cases
+ = v_cond_u32 (ia12 - v_u32 (TinyBound) >= (0x7f8 - TinyBound))
+ | v_cond_u32 (ix >= MinusOne);
+ v_f32_t special_arg = x;
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (special_cases)))
+ /* Side-step special lanes so fenv exceptions are not triggered
+ inadvertently. */
+ x = v_sel_f32 (special_cases, v_f32 (1), x);
+#endif
+
+ /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+ is in [-0.25, 0.5]):
+ log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+ We approximate log1p(m) with a polynomial, then scale by
+ k*log(2). Instead of doing this directly, we use an intermediate
+ scale factor s = 4*k*log(2) to ensure the scale is representable
+ as a normalised fp32 number. */
+
+ v_f32_t m = x + v_f32 (1.0f);
+
+ /* Choose k to scale x to the range [-1/4, 1/2]. */
+ v_s32_t k = (v_as_s32_f32 (m) - ThreeQuarters) & v_u32 (0xff800000);
+
+ /* Scale x by exponent manipulation. */
+ v_f32_t m_scale = v_as_f32_u32 (v_as_u32_f32 (x) - v_as_u32_s32 (k));
+
+ /* Scale up to ensure that the scale factor is representable as normalised
+ fp32 number, and scale m down accordingly. */
+ v_f32_t s = v_as_f32_u32 (v_u32 (Four) - k);
+ m_scale = m_scale + v_fma_f32 (v_f32 (0.25f), s, v_f32 (-1.0f));
+
+ /* Evaluate polynomial on the reduced interval. */
+ v_f32_t p = eval_poly (m_scale);
+
+ /* The scale factor to be applied back at the end - by multiplying float(k)
+ by 2^-23 we get the unbiased exponent of k. */
+ v_f32_t scale_back = v_to_f32_s32 (k) * v_f32 (0x1p-23f);
+
+ /* Apply the scaling back. */
+ v_f32_t y = v_fma_f32 (scale_back, v_f32 (Ln2), p);
+
+ if (unlikely (v_any_u32 (special_cases)))
+ return v_call_f32 (handle_special, special_arg, y, special_cases);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (V_NAME (log1pf), 1.53)
+PL_TEST_EXPECT_FENV (V_NAME (log1pf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (log1pf), -10.0, 10.0, 10000)
+PL_TEST_INTERVAL (V_NAME (log1pf), 0.0, 0x1p-23, 30000)
+PL_TEST_INTERVAL (V_NAME (log1pf), 0x1p-23, 0.001, 50000)
+PL_TEST_INTERVAL (V_NAME (log1pf), 0.001, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME (log1pf), 0.0, -0x1p-23, 30000)
+PL_TEST_INTERVAL (V_NAME (log1pf), -0x1p-23, -0.001, 30000)
+PL_TEST_INTERVAL (V_NAME (log1pf), -0.001, -1.0, 50000)
+PL_TEST_INTERVAL (V_NAME (log1pf), -1.0, inf, 1000)
+#endif
diff --git a/pl/math/v_log1pf_inline.h b/pl/math/v_log1pf_inline.h
new file mode 100644
index 0000000..e3048e6
--- /dev/null
+++ b/pl/math/v_log1pf_inline.h
@@ -0,0 +1,55 @@
+/*
+ * Helper for single-precision routines which calculate log(1 + x) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_V_LOG1PF_INLINE_H
+#define PL_MATH_V_LOG1PF_INLINE_H
+
+#include "v_math.h"
+#include "math_config.h"
+
+#define Four 0x40800000
+#define Ln2 v_f32 (0x1.62e43p-1f)
+
+#define C(i) v_f32 (__log1pf_data.coeffs[i])
+
+static inline v_f32_t
+eval_poly (v_f32_t m)
+{
+ /* Approximate log(1+m) on [-0.25, 0.5] using Estrin scheme. */
+ v_f32_t p_12 = v_fma_f32 (m, C (1), C (0));
+ v_f32_t p_34 = v_fma_f32 (m, C (3), C (2));
+ v_f32_t p_56 = v_fma_f32 (m, C (5), C (4));
+ v_f32_t p_78 = v_fma_f32 (m, C (7), C (6));
+
+ v_f32_t m2 = m * m;
+ v_f32_t p_02 = v_fma_f32 (m2, p_12, m);
+ v_f32_t p_36 = v_fma_f32 (m2, p_56, p_34);
+ v_f32_t p_79 = v_fma_f32 (m2, C (8), p_78);
+
+ v_f32_t m4 = m2 * m2;
+ v_f32_t p_06 = v_fma_f32 (m4, p_36, p_02);
+
+ return v_fma_f32 (m4, m4 * p_79, p_06);
+}
+
+static inline v_f32_t
+log1pf_inline (v_f32_t x)
+{
+ /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
+ special-case handling. See that file for details of the algorithm. */
+ v_f32_t m = x + 1.0f;
+ v_u32_t k = (v_as_u32_f32 (m) - 0x3f400000) & 0xff800000;
+ v_f32_t s = v_as_f32_u32 (v_u32 (Four) - k);
+ v_f32_t m_scale = v_as_f32_u32 (v_as_u32_f32 (x) - k)
+ + v_fma_f32 (v_f32 (0.25f), s, v_f32 (-1.0f));
+ v_f32_t p = eval_poly (m_scale);
+ v_f32_t scale_back = v_to_f32_u32 (k) * 0x1.0p-23f;
+ return v_fma_f32 (scale_back, Ln2, p);
+}
+
+#endif // PL_MATH_V_LOG1PF_INLINE_H
diff --git a/pl/math/v_log2_3u.c b/pl/math/v_log2_3u.c
new file mode 100644
index 0000000..fac73f6
--- /dev/null
+++ b/pl/math/v_log2_3u.c
@@ -0,0 +1,100 @@
+/*
+ * Double-precision vector log2 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "include/mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define InvLn2 v_f64 (0x1.71547652b82fep0)
+#define N (1 << V_LOG2_TABLE_BITS)
+#define OFF v_u64 (0x3fe6900900000000)
+#define P(i) v_f64 (__v_log2_data.poly[i])
+
+struct entry
+{
+ v_f64_t invc;
+ v_f64_t log2c;
+};
+
+static inline struct entry
+lookup (v_u64_t i)
+{
+ struct entry e;
+#ifdef SCALAR
+ e.invc = __v_log2_data.tab[i].invc;
+ e.log2c = __v_log2_data.tab[i].log2c;
+#else
+ e.invc[0] = __v_log2_data.tab[i[0]].invc;
+ e.log2c[0] = __v_log2_data.tab[i[0]].log2c;
+ e.invc[1] = __v_log2_data.tab[i[1]].invc;
+ e.log2c[1] = __v_log2_data.tab[i[1]].log2c;
+#endif
+ return e;
+}
+
+VPCS_ATTR
+NOINLINE static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
+{
+ return v_call_f64 (log2, x, y, cmp);
+}
+
+/* Double-precision vector log2 routine. Implements the same algorithm as vector
+ log10, with coefficients and table entries scaled in extended precision.
+ The maximum observed error is 2.58 ULP:
+ __v_log2(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
+ want 0x1.fffb34198d9ddp-5. */
+VPCS_ATTR
+v_f64_t V_NAME (log2) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t top = ix >> 48;
+ v_u64_t special
+ = v_cond_u64 (top - v_u64 (0x0010) >= v_u64 (0x7ff0 - 0x0010));
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ v_u64_t tmp = ix - OFF;
+ v_u64_t i = (tmp >> (52 - V_LOG2_TABLE_BITS)) % N;
+ v_s64_t k = v_as_s64_u64 (tmp) >> 52; /* arithmetic shift. */
+ v_u64_t iz = ix - (tmp & v_u64 (0xfffULL << 52));
+ v_f64_t z = v_as_f64_u64 (iz);
+ struct entry e = lookup (i);
+
+ /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */
+
+ v_f64_t r = v_fma_f64 (z, e.invc, v_f64 (-1.0));
+ v_f64_t kd = v_to_f64_s64 (k);
+ v_f64_t w = v_fma_f64 (r, InvLn2, e.log2c);
+
+ v_f64_t r2 = r * r;
+ v_f64_t p_23 = v_fma_f64 (P (3), r, P (2));
+ v_f64_t p_01 = v_fma_f64 (P (1), r, P (0));
+ v_f64_t y = v_fma_f64 (P (4), r2, p_23);
+ y = v_fma_f64 (r2, y, p_01);
+ y = v_fma_f64 (r2, y, kd + w);
+
+ if (unlikely (v_any_u64 (special)))
+ return specialcase (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, log2, 0.01, 11.1)
+PL_TEST_ULP (V_NAME (log2), 2.09)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (log2))
+PL_TEST_INTERVAL (V_NAME (log2), -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (V_NAME (log2), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (V_NAME (log2), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME (log2), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME (log2), 1.0, 100, 50000)
+PL_TEST_INTERVAL (V_NAME (log2), 100, inf, 50000)
+#endif
diff --git a/pl/math/v_log2_data.c b/pl/math/v_log2_data.c
new file mode 100644
index 0000000..2a1da68
--- /dev/null
+++ b/pl/math/v_log2_data.c
@@ -0,0 +1,155 @@
+/*
+ * Coefficients and table entries for vector log2
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << V_LOG2_TABLE_BITS)
+
+// clang-format off
+
+const struct v_log2_data __v_log2_data = {
+
+/* Derived from the coefficients in log_data.c for N == 128 && LOG_POLY_ORDER == 6.
+ Each coefficient was scaled by log2(e) in extended precision and rounded back to
+ double. */
+.poly = { -0x1.71547652b83p-1, 0x1.ec709dc340953p-2, -0x1.71547651c8f35p-2,
+ 0x1.2777ebe12dda5p-2, -0x1.ec738d616fe26p-3 },
+
+/* Derived from the table in v_log10_data.c. invc is unchanged. log2(c) was
+ calculated by scaling log10(c) by log2(10) in extended precision and rounding
+ back. */
+.tab = {
+{ 0x1.6a133d0dec120p+0, -0x1.00130d57f5fadp-1 },
+{ 0x1.6815f2f3e42edp+0, -0x1.f802661bd725ep-2 },
+{ 0x1.661e39be1ac9ep+0, -0x1.efea1c6f73a5bp-2 },
+{ 0x1.642bfa30ac371p+0, -0x1.e7dd1dcd06f05p-2 },
+{ 0x1.623f1d916f323p+0, -0x1.dfdb4ae024809p-2 },
+{ 0x1.60578da220f65p+0, -0x1.d7e484d101958p-2 },
+{ 0x1.5e75349dea571p+0, -0x1.cff8ad452f6ep-2 },
+{ 0x1.5c97fd387a75ap+0, -0x1.c817a666c997fp-2 },
+{ 0x1.5abfd2981f200p+0, -0x1.c04152d640419p-2 },
+{ 0x1.58eca051dc99cp+0, -0x1.b87595a3f64b2p-2 },
+{ 0x1.571e526d9df12p+0, -0x1.b0b4526c44d07p-2 },
+{ 0x1.5554d555b3fcbp+0, -0x1.a8fd6d1a90f5ep-2 },
+{ 0x1.539015e2a20cdp+0, -0x1.a150ca2559fc6p-2 },
+{ 0x1.51d0014ee0164p+0, -0x1.99ae4e62cca29p-2 },
+{ 0x1.50148538cd9eep+0, -0x1.9215df1a1e842p-2 },
+{ 0x1.4e5d8f9f698a1p+0, -0x1.8a8761fe1f0d9p-2 },
+{ 0x1.4cab0edca66bep+0, -0x1.8302bd1cc9a54p-2 },
+{ 0x1.4afcf1a9db874p+0, -0x1.7b87d6fb437f6p-2 },
+{ 0x1.495327136e16fp+0, -0x1.741696673a86dp-2 },
+{ 0x1.47ad9e84af28fp+0, -0x1.6caee2b3c6fe4p-2 },
+{ 0x1.460c47b39ae15p+0, -0x1.6550a3666c27ap-2 },
+{ 0x1.446f12b278001p+0, -0x1.5dfbc08de02a4p-2 },
+{ 0x1.42d5efdd720ecp+0, -0x1.56b022766c84ap-2 },
+{ 0x1.4140cfe001a0fp+0, -0x1.4f6db1c955536p-2 },
+{ 0x1.3fafa3b421f69p+0, -0x1.4834579063054p-2 },
+{ 0x1.3e225c9c8ece5p+0, -0x1.4103fd2249a76p-2 },
+{ 0x1.3c98ec29a211ap+0, -0x1.39dc8c3fe6dabp-2 },
+{ 0x1.3b13442a413fep+0, -0x1.32bdeed4b5c8fp-2 },
+{ 0x1.399156baa3c54p+0, -0x1.2ba80f41e20ddp-2 },
+{ 0x1.38131639b4cdbp+0, -0x1.249ad8332f4a7p-2 },
+{ 0x1.36987540fbf53p+0, -0x1.1d96347e7f3ebp-2 },
+{ 0x1.352166b648f61p+0, -0x1.169a0f7d6604ap-2 },
+{ 0x1.33adddb3eb575p+0, -0x1.0fa654a221909p-2 },
+{ 0x1.323dcd99fc1d3p+0, -0x1.08baefcf8251ap-2 },
+{ 0x1.30d129fefc7d2p+0, -0x1.01d7cd14deecdp-2 },
+{ 0x1.2f67e6b72fe7dp+0, -0x1.f5f9b1ad55495p-3 },
+{ 0x1.2e01f7cf8b187p+0, -0x1.e853ff76a77afp-3 },
+{ 0x1.2c9f518ddc86ep+0, -0x1.dabe5d624cba1p-3 },
+{ 0x1.2b3fe86e5f413p+0, -0x1.cd38a5cef4822p-3 },
+{ 0x1.29e3b1211b25cp+0, -0x1.bfc2b38d315f9p-3 },
+{ 0x1.288aa08b373cfp+0, -0x1.b25c61f5edd0fp-3 },
+{ 0x1.2734abcaa8467p+0, -0x1.a5058d18e9cacp-3 },
+{ 0x1.25e1c82459b81p+0, -0x1.97be1113e47a3p-3 },
+{ 0x1.2491eb1ad59c5p+0, -0x1.8a85cafdf5e27p-3 },
+{ 0x1.23450a54048b5p+0, -0x1.7d5c97e8fc45bp-3 },
+{ 0x1.21fb1bb09e578p+0, -0x1.704255d6486e4p-3 },
+{ 0x1.20b415346d8f7p+0, -0x1.6336e2cedd7bfp-3 },
+{ 0x1.1f6fed179a1acp+0, -0x1.563a1d9b0cc6ap-3 },
+{ 0x1.1e2e99b93c7b3p+0, -0x1.494be541aaa6fp-3 },
+{ 0x1.1cf011a7a882ap+0, -0x1.3c6c1964dd0f2p-3 },
+{ 0x1.1bb44b97dba5ap+0, -0x1.2f9a99f19a243p-3 },
+{ 0x1.1a7b3e66cdd4fp+0, -0x1.22d747344446p-3 },
+{ 0x1.1944e11dc56cdp+0, -0x1.1622020d4f7f5p-3 },
+{ 0x1.18112aebb1a6ep+0, -0x1.097aabb3553f3p-3 },
+{ 0x1.16e013231b7e9p+0, -0x1.f9c24b48014c5p-4 },
+{ 0x1.15b1913f156cfp+0, -0x1.e0aaa3bdc858ap-4 },
+{ 0x1.14859cdedde13p+0, -0x1.c7ae257c952d6p-4 },
+{ 0x1.135c2dc68cfa4p+0, -0x1.aecc960a03e58p-4 },
+{ 0x1.12353bdb01684p+0, -0x1.9605bb724d541p-4 },
+{ 0x1.1110bf25b85b4p+0, -0x1.7d595ca7147cep-4 },
+{ 0x1.0feeafd2f8577p+0, -0x1.64c74165002d9p-4 },
+{ 0x1.0ecf062c51c3bp+0, -0x1.4c4f31c86d344p-4 },
+{ 0x1.0db1baa076c8bp+0, -0x1.33f0f70388258p-4 },
+{ 0x1.0c96c5bb3048ep+0, -0x1.1bac5abb3037dp-4 },
+{ 0x1.0b7e20263e070p+0, -0x1.0381272495f21p-4 },
+{ 0x1.0a67c2acd0ce3p+0, -0x1.d6de4eba2de2ap-5 },
+{ 0x1.0953a6391e982p+0, -0x1.a6ec4e8156898p-5 },
+{ 0x1.0841c3caea380p+0, -0x1.772be542e3e1bp-5 },
+{ 0x1.07321489b13eap+0, -0x1.479cadcde852dp-5 },
+{ 0x1.062491aee9904p+0, -0x1.183e4265faa5p-5 },
+{ 0x1.05193497a7cc5p+0, -0x1.d2207fdaa1b85p-6 },
+{ 0x1.040ff6b5f5e9fp+0, -0x1.742486cb4a6a2p-6 },
+{ 0x1.0308d19aa6127p+0, -0x1.1687d77cfc299p-6 },
+{ 0x1.0203beedb0c67p+0, -0x1.7293623a6b5dep-7 },
+{ 0x1.010037d38bcc2p+0, -0x1.70ec80ec8f25dp-8 },
+{ 1.0, 0.0 },
+{ 0x1.fc06d493cca10p-1, 0x1.704c1ca6b6bc9p-7 },
+{ 0x1.f81e6ac3b918fp-1, 0x1.6eac8ba664beap-6 },
+{ 0x1.f44546ef18996p-1, 0x1.11e67d040772dp-5 },
+{ 0x1.f07b10382c84bp-1, 0x1.6bc665e2105dep-5 },
+{ 0x1.ecbf7070e59d4p-1, 0x1.c4f8a9772bf1dp-5 },
+{ 0x1.e91213f715939p-1, 0x1.0ebff10fbb951p-4 },
+{ 0x1.e572a9a75f7b7p-1, 0x1.3aaf4d7805d11p-4 },
+{ 0x1.e1e0e2c530207p-1, 0x1.664ba81a4d717p-4 },
+{ 0x1.de5c72d8a8be3p-1, 0x1.9196387da6de4p-4 },
+{ 0x1.dae50fa5658ccp-1, 0x1.bc902f2b7796p-4 },
+{ 0x1.d77a71145a2dap-1, 0x1.e73ab5f584f28p-4 },
+{ 0x1.d41c51166623ep-1, 0x1.08cb78510d232p-3 },
+{ 0x1.d0ca6ba0bb29fp-1, 0x1.1dd2fe2f0dcb5p-3 },
+{ 0x1.cd847e8e59681p-1, 0x1.32b4784400df4p-3 },
+{ 0x1.ca4a499693e00p-1, 0x1.47706f3d49942p-3 },
+{ 0x1.c71b8e399e821p-1, 0x1.5c0768ee4a4dcp-3 },
+{ 0x1.c3f80faf19077p-1, 0x1.7079e86fc7c6dp-3 },
+{ 0x1.c0df92dc2b0ecp-1, 0x1.84c86e1183467p-3 },
+{ 0x1.bdd1de3cbb542p-1, 0x1.98f377a34b499p-3 },
+{ 0x1.baceb9e1007a3p-1, 0x1.acfb803bc924bp-3 },
+{ 0x1.b7d5ef543e55ep-1, 0x1.c0e10098b025fp-3 },
+{ 0x1.b4e749977d953p-1, 0x1.d4a46efe103efp-3 },
+{ 0x1.b20295155478ep-1, 0x1.e8463f45b8d0bp-3 },
+{ 0x1.af279f8e82be2p-1, 0x1.fbc6e3228997fp-3 },
+{ 0x1.ac5638197fdf3p-1, 0x1.079364f2e5aa8p-2 },
+{ 0x1.a98e2f102e087p-1, 0x1.1133306010a63p-2 },
+{ 0x1.a6cf5606d05c1p-1, 0x1.1ac309631bd17p-2 },
+{ 0x1.a4197fc04d746p-1, 0x1.24432485370c1p-2 },
+{ 0x1.a16c80293dc01p-1, 0x1.2db3b5449132fp-2 },
+{ 0x1.9ec82c4dc5bc9p-1, 0x1.3714ee1d7a32p-2 },
+{ 0x1.9c2c5a491f534p-1, 0x1.406700ab52c94p-2 },
+{ 0x1.9998e1480b618p-1, 0x1.49aa1d87522b2p-2 },
+{ 0x1.970d9977c6c2dp-1, 0x1.52de746d7ecb2p-2 },
+{ 0x1.948a5c023d212p-1, 0x1.5c0434336b343p-2 },
+{ 0x1.920f0303d6809p-1, 0x1.651b8ad6c90d1p-2 },
+{ 0x1.8f9b698a98b45p-1, 0x1.6e24a56ab5831p-2 },
+{ 0x1.8d2f6b81726f6p-1, 0x1.771fb04ec29b1p-2 },
+{ 0x1.8acae5bb55badp-1, 0x1.800cd6f19c25ep-2 },
+{ 0x1.886db5d9275b8p-1, 0x1.88ec441df11dfp-2 },
+{ 0x1.8617ba567c13cp-1, 0x1.91be21b7c93f5p-2 },
+{ 0x1.83c8d27487800p-1, 0x1.9a8298f8c7454p-2 },
+{ 0x1.8180de3c5dbe7p-1, 0x1.a339d255c04ddp-2 },
+{ 0x1.7f3fbe71cdb71p-1, 0x1.abe3f59f43db7p-2 },
+{ 0x1.7d055498071c1p-1, 0x1.b48129deca9efp-2 },
+{ 0x1.7ad182e54f65ap-1, 0x1.bd119575364c1p-2 },
+{ 0x1.78a42c3c90125p-1, 0x1.c5955e23ebcbcp-2 },
+{ 0x1.767d342f76944p-1, 0x1.ce0ca8f4e1557p-2 },
+{ 0x1.745c7ef26b00ap-1, 0x1.d6779a5a75774p-2 },
+{ 0x1.7241f15769d0fp-1, 0x1.ded6563550d27p-2 },
+{ 0x1.702d70d396e41p-1, 0x1.e728ffafd840ep-2 },
+{ 0x1.6e1ee3700cd11p-1, 0x1.ef6fb96c8d739p-2 },
+{ 0x1.6c162fc9cbe02p-1, 0x1.f7aaa57907219p-2 }}
+};
+// clang-format on
diff --git a/pl/math/v_log2f_2u5.c b/pl/math/v_log2f_2u5.c
new file mode 100644
index 0000000..8f9241b
--- /dev/null
+++ b/pl/math/v_log2f_2u5.c
@@ -0,0 +1,68 @@
+/*
+ * Single-precision vector log2 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pairwise_hornerf.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#if V_SUPPORTED
+
+#define C(i) v_f32 (__v_log2f_data.poly[i])
+
+#define Ln2 v_f32 (0x1.62e43p-1f) /* 0x3f317218 */
+#define Min v_u32 (0x00800000)
+#define Max v_u32 (0x7f800000)
+#define Mask v_u32 (0x007fffff)
+#define Off v_u32 (0x3f2aaaab) /* 0.666667 */
+
+VPCS_ATTR
+NOINLINE static v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
+{
+ /* Fall back to scalar code. */
+ return v_call_f32 (log2f, x, y, cmp);
+}
+
+/* Fast implementation for single precision log2,
+ relies on same argument reduction as Neon logf.
+ Maximum error: 2.48 ULPs
+ __v_log2f(0x1.558174p+0) got 0x1.a9be84p-2
+ want 0x1.a9be8p-2. */
+VPCS_ATTR
+v_f32_t V_NAME (log2f) (v_f32_t x)
+{
+ v_u32_t u = v_as_u32_f32 (x);
+ v_u32_t cmp = v_cond_u32 (u - Min >= Max - Min);
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u -= Off;
+ v_f32_t n = v_to_f32_s32 (v_as_s32_u32 (u) >> 23); /* signextend. */
+ u &= Mask;
+ u += Off;
+ v_f32_t r = v_as_f32_u32 (u) - v_f32 (1.0f);
+
+ /* y = log2(1+r) + n. */
+ v_f32_t r2 = r * r;
+ v_f32_t p = PAIRWISE_HORNER_8 (r, r2, C);
+ v_f32_t y = v_fma_f32 (p, r, n);
+
+ if (unlikely (v_any_u32 (cmp)))
+ return specialcase (x, y, cmp);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, log2, 0.01, 11.1)
+PL_TEST_ULP (V_NAME (log2f), 1.99)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (log2f))
+PL_TEST_INTERVAL (V_NAME (log2f), -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (V_NAME (log2f), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (V_NAME (log2f), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME (log2f), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME (log2f), 1.0, 100, 50000)
+PL_TEST_INTERVAL (V_NAME (log2f), 100, inf, 50000)
+#endif
diff --git a/pl/math/v_log2f_data.c b/pl/math/v_log2f_data.c
new file mode 100644
index 0000000..b144e8f
--- /dev/null
+++ b/pl/math/v_log2f_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients for vector log2f
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* See tools/v_log2f.sollya for the algorithm used to generate these
+ coefficients. */
+const struct v_log2f_data __v_log2f_data
+ = {.poly = {0x1.715476p0f, /* (float)(1 / ln(2)). */
+ -0x1.715458p-1f, 0x1.ec701cp-2f, -0x1.7171a4p-2f, 0x1.27a0b8p-2f,
+ -0x1.e5143ep-3f, 0x1.9d8ecap-3f, -0x1.c675bp-3f, 0x1.9e495p-3f}};
diff --git a/pl/math/v_math.h b/pl/math/v_math.h
new file mode 100644
index 0000000..a8fa091
--- /dev/null
+++ b/pl/math/v_math.h
@@ -0,0 +1,855 @@
+/*
+ * Vector math abstractions.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _V_MATH_H
+#define _V_MATH_H
+
+#ifndef WANT_VMATH
+/* Enable the build of vector math code. */
+# define WANT_VMATH 1
+#endif
+#if WANT_VMATH
+
+/* The goal of this header is to allow vector (only Neon for now)
+ and scalar build of the same algorithm. */
+
+#if SCALAR
+#define V_NAME(x) __s_##x
+#elif VPCS && __aarch64__
+#define V_NAME(x) __vn_##x
+#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
+#else
+#define V_NAME(x) __v_##x
+#endif
+
+#ifndef VPCS_ATTR
+#define VPCS_ATTR
+#endif
+#ifndef VPCS_ALIAS
+#define VPCS_ALIAS
+#endif
+
+#include <stdint.h>
+#include "math_config.h"
+
+typedef float f32_t;
+typedef uint32_t u32_t;
+typedef int32_t s32_t;
+typedef double f64_t;
+typedef uint64_t u64_t;
+typedef int64_t s64_t;
+
+/* reinterpret as type1 from type2. */
+static inline u32_t
+as_u32_f32 (f32_t x)
+{
+ union { f32_t f; u32_t u; } r = {x};
+ return r.u;
+}
+static inline f32_t
+as_f32_u32 (u32_t x)
+{
+ union { u32_t u; f32_t f; } r = {x};
+ return r.f;
+}
+static inline s32_t
+as_s32_u32 (u32_t x)
+{
+ union { u32_t u; s32_t i; } r = {x};
+ return r.i;
+}
+static inline u32_t
+as_u32_s32 (s32_t x)
+{
+ union { s32_t i; u32_t u; } r = {x};
+ return r.u;
+}
+static inline u64_t
+as_u64_f64 (f64_t x)
+{
+ union { f64_t f; u64_t u; } r = {x};
+ return r.u;
+}
+static inline f64_t
+as_f64_u64 (u64_t x)
+{
+ union { u64_t u; f64_t f; } r = {x};
+ return r.f;
+}
+static inline s64_t
+as_s64_u64 (u64_t x)
+{
+ union { u64_t u; s64_t i; } r = {x};
+ return r.i;
+}
+static inline u64_t
+as_u64_s64 (s64_t x)
+{
+ union { s64_t i; u64_t u; } r = {x};
+ return r.u;
+}
+
+#if SCALAR
+#define V_SUPPORTED 1
+typedef f32_t v_f32_t;
+typedef u32_t v_u32_t;
+typedef s32_t v_s32_t;
+typedef f64_t v_f64_t;
+typedef u64_t v_u64_t;
+typedef s64_t v_s64_t;
+
+static inline int
+v_lanes32 (void)
+{
+ return 1;
+}
+
+static inline v_f32_t
+v_f32 (f32_t x)
+{
+ return x;
+}
+static inline v_u32_t
+v_u32 (u32_t x)
+{
+ return x;
+}
+static inline v_s32_t
+v_s32 (s32_t x)
+{
+ return x;
+}
+
+static inline f32_t
+v_get_f32 (v_f32_t x, int i)
+{
+ return x;
+}
+static inline u32_t
+v_get_u32 (v_u32_t x, int i)
+{
+ return x;
+}
+static inline s32_t
+v_get_s32 (v_s32_t x, int i)
+{
+ return x;
+}
+
+static inline void
+v_set_f32 (v_f32_t *x, int i, f32_t v)
+{
+ *x = v;
+}
+static inline void
+v_set_u32 (v_u32_t *x, int i, u32_t v)
+{
+ *x = v;
+}
+static inline void
+v_set_s32 (v_s32_t *x, int i, s32_t v)
+{
+ *x = v;
+}
+
+/* true if any elements of a v_cond result is non-zero. */
+static inline int
+v_any_u32 (v_u32_t x)
+{
+ return x != 0;
+}
+/* to wrap the result of relational operators. */
+static inline v_u32_t
+v_cond_u32 (v_u32_t x)
+{
+ return x ? -1 : 0;
+}
+static inline v_f32_t
+v_abs_f32 (v_f32_t x)
+{
+ return __builtin_fabsf (x);
+}
+static inline v_u32_t
+v_bsl_u32 (v_u32_t m, v_u32_t x, v_u32_t y)
+{
+ return (y & ~m) | (x & m);
+}
+static inline v_u32_t
+v_cagt_f32 (v_f32_t x, v_f32_t y)
+{
+ return fabsf (x) > fabsf (y);
+}
+/* to wrap |x| >= |y|. */
+static inline v_u32_t
+v_cage_f32 (v_f32_t x, v_f32_t y)
+{
+ return fabsf (x) >= fabsf (y);
+}
+static inline v_u32_t
+v_calt_f32 (v_f32_t x, v_f32_t y)
+{
+ return fabsf (x) < fabsf (y);
+}
+static inline v_f32_t
+v_div_f32 (v_f32_t x, v_f32_t y)
+{
+ return x / y;
+}
+static inline v_f32_t
+v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
+{
+ return __builtin_fmaf (x, y, z);
+}
+static inline v_f32_t
+v_round_f32 (v_f32_t x)
+{
+ return __builtin_roundf (x);
+}
+static inline v_s32_t
+v_round_s32 (v_f32_t x)
+{
+ return __builtin_lroundf (x); /* relies on -fno-math-errno. */
+}
+static inline v_f32_t
+v_sel_f32 (v_u32_t p, v_f32_t x, v_f32_t y)
+{
+ return p ? x : y;
+}
+static inline v_u32_t
+v_sel_u32 (v_u32_t p, v_u32_t x, v_u32_t y)
+{
+ return p ? x : y;
+}
+static inline v_f32_t
+v_sqrt_f32 (v_f32_t x)
+{
+ return __builtin_sqrtf (x);
+}
+/* convert to type1 from type2. */
+static inline v_f32_t
+v_to_f32_s32 (v_s32_t x)
+{
+ return x;
+}
+static inline v_s32_t
+v_to_s32_f32 (v_f32_t x)
+{
+ return x;
+}
+static inline v_f32_t
+v_to_f32_u32 (v_u32_t x)
+{
+ return x;
+}
+/* reinterpret as type1 from type2. */
+static inline v_u32_t
+v_as_u32_f32 (v_f32_t x)
+{
+ union { v_f32_t f; v_u32_t u; } r = {x};
+ return r.u;
+}
+static inline v_s32_t
+v_as_s32_f32 (v_f32_t x)
+{
+ union
+ {
+ v_f32_t f;
+ v_s32_t u;
+ } r = {x};
+ return r.u;
+}
+static inline v_f32_t
+v_as_f32_u32 (v_u32_t x)
+{
+ union { v_u32_t u; v_f32_t f; } r = {x};
+ return r.f;
+}
+static inline v_s32_t
+v_as_s32_u32 (v_u32_t x)
+{
+ union { v_u32_t u; v_s32_t i; } r = {x};
+ return r.i;
+}
+static inline v_u32_t
+v_as_u32_s32 (v_s32_t x)
+{
+ union { v_s32_t i; v_u32_t u; } r = {x};
+ return r.u;
+}
+static inline v_f32_t
+v_lookup_f32 (const f32_t *tab, v_u32_t idx)
+{
+ return tab[idx];
+}
+static inline v_u32_t
+v_lookup_u32 (const u32_t *tab, v_u32_t idx)
+{
+ return tab[idx];
+}
+static inline v_f32_t
+v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
+{
+ return f (x);
+}
+static inline v_f32_t
+v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
+ v_u32_t p)
+{
+ return f (x1, x2);
+}
+
+static inline int
+v_lanes64 (void)
+{
+ return 1;
+}
+static inline v_f64_t
+v_f64 (f64_t x)
+{
+ return x;
+}
+static inline v_u64_t
+v_u64 (u64_t x)
+{
+ return x;
+}
+static inline v_s64_t
+v_s64 (s64_t x)
+{
+ return x;
+}
+static inline f64_t
+v_get_f64 (v_f64_t x, int i)
+{
+ return x;
+}
+static inline void
+v_set_f64 (v_f64_t *x, int i, f64_t v)
+{
+ *x = v;
+}
+/* true if any elements of a v_cond result is non-zero. */
+static inline int
+v_any_u64 (v_u64_t x)
+{
+ return x != 0;
+}
+/* true if all elements of a v_cond result is non-zero. */
+static inline int
+v_all_u64 (v_u64_t x)
+{
+ return x;
+}
+/* to wrap the result of relational operators. */
+static inline v_u64_t
+v_cond_u64 (v_u64_t x)
+{
+ return x ? -1 : 0;
+}
+static inline v_f64_t
+v_abs_f64 (v_f64_t x)
+{
+ return __builtin_fabs (x);
+}
+static inline v_u64_t
+v_bsl_u64 (v_u64_t m, v_u64_t x, v_u64_t y)
+{
+ return (y & ~m) | (x & m);
+}
+static inline v_u64_t
+v_cagt_f64 (v_f64_t x, v_f64_t y)
+{
+ return fabs (x) > fabs (y);
+}
+static inline v_f64_t
+v_div_f64 (v_f64_t x, v_f64_t y)
+{
+ return x / y;
+}
+static inline v_f64_t
+v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
+{
+ return __builtin_fma (x, y, z);
+}
+static inline v_f64_t
+v_min_f64(v_f64_t x, v_f64_t y) {
+ return x < y ? x : y;
+}
+static inline v_f64_t
+v_round_f64 (v_f64_t x)
+{
+ return __builtin_round (x);
+}
+static inline v_f64_t
+v_sel_f64 (v_u64_t p, v_f64_t x, v_f64_t y)
+{
+ return p ? x : y;
+}
+static inline v_f64_t
+v_sqrt_f64 (v_f64_t x)
+{
+ return __builtin_sqrt (x);
+}
+static inline v_s64_t
+v_round_s64 (v_f64_t x)
+{
+ return __builtin_lround (x); /* relies on -fno-math-errno. */
+}
+static inline v_u64_t
+v_trunc_u64 (v_f64_t x)
+{
+ return __builtin_trunc (x);
+}
+/* convert to type1 from type2. */
+static inline v_f64_t
+v_to_f64_s64 (v_s64_t x)
+{
+ return x;
+}
+static inline v_f64_t
+v_to_f64_u64 (v_u64_t x)
+{
+ return x;
+}
+
+static inline v_s64_t
+v_to_s64_f64 (v_f64_t x)
+{
+ return x;
+}
+/* reinterpret as type1 from type2. */
+static inline v_u64_t
+v_as_u64_f64 (v_f64_t x)
+{
+ union { v_f64_t f; v_u64_t u; } r = {x};
+ return r.u;
+}
+static inline v_f64_t
+v_as_f64_u64 (v_u64_t x)
+{
+ union { v_u64_t u; v_f64_t f; } r = {x};
+ return r.f;
+}
+static inline v_s64_t
+v_as_s64_u64 (v_u64_t x)
+{
+ union { v_u64_t u; v_s64_t i; } r = {x};
+ return r.i;
+}
+static inline v_u64_t
+v_as_u64_s64 (v_s64_t x)
+{
+ union { v_s64_t i; v_u64_t u; } r = {x};
+ return r.u;
+}
+static inline v_f64_t
+v_lookup_f64 (const f64_t *tab, v_u64_t idx)
+{
+ return tab[idx];
+}
+static inline v_u64_t
+v_lookup_u64 (const u64_t *tab, v_u64_t idx)
+{
+ return tab[idx];
+}
+static inline v_f64_t
+v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
+{
+ return f (x);
+}
+static inline v_f64_t
+v_call2_f64 (f64_t (*f) (f64_t, f64_t), v_f64_t x1, v_f64_t x2, v_f64_t y,
+ v_u64_t p)
+{
+ return f (x1, x2);
+}
+
+#elif __aarch64__
+#define V_SUPPORTED 1
+#include <arm_neon.h>
+typedef float32x4_t v_f32_t;
+typedef uint32x4_t v_u32_t;
+typedef int32x4_t v_s32_t;
+typedef float64x2_t v_f64_t;
+typedef uint64x2_t v_u64_t;
+typedef int64x2_t v_s64_t;
+
+static inline int
+v_lanes32 (void)
+{
+ return 4;
+}
+
+static inline v_f32_t
+v_f32 (f32_t x)
+{
+ return (v_f32_t){x, x, x, x};
+}
+static inline v_u32_t
+v_u32 (u32_t x)
+{
+ return (v_u32_t){x, x, x, x};
+}
+static inline v_s32_t
+v_s32 (s32_t x)
+{
+ return (v_s32_t){x, x, x, x};
+}
+
+static inline f32_t
+v_get_f32 (v_f32_t x, int i)
+{
+ return x[i];
+}
+static inline u32_t
+v_get_u32 (v_u32_t x, int i)
+{
+ return x[i];
+}
+static inline s32_t
+v_get_s32 (v_s32_t x, int i)
+{
+ return x[i];
+}
+
+static inline void
+v_set_f32 (v_f32_t *x, int i, f32_t v)
+{
+ (*x)[i] = v;
+}
+static inline void
+v_set_u32 (v_u32_t *x, int i, u32_t v)
+{
+ (*x)[i] = v;
+}
+static inline void
+v_set_s32 (v_s32_t *x, int i, s32_t v)
+{
+ (*x)[i] = v;
+}
+
+/* true if any elements of a v_cond result is non-zero. */
+static inline int
+v_any_u32 (v_u32_t x)
+{
+ /* assume elements in x are either 0 or -1u. */
+ return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
+}
+/* to wrap the result of relational operators. */
+static inline v_u32_t
+v_cond_u32 (v_u32_t x)
+{
+ return x;
+}
+static inline v_f32_t
+v_abs_f32 (v_f32_t x)
+{
+ return vabsq_f32 (x);
+}
+static inline v_u32_t
+v_bsl_u32 (v_u32_t m, v_u32_t x, v_u32_t y)
+{
+ return vbslq_u32 (m, x, y);
+}
+static inline v_u32_t
+v_cagt_f32 (v_f32_t x, v_f32_t y)
+{
+ return vcagtq_f32 (x, y);
+}
+/* to wrap |x| >= |y|. */
+static inline v_u32_t
+v_cage_f32 (v_f32_t x, v_f32_t y)
+{
+ return vcageq_f32 (x, y);
+}
+static inline v_u32_t
+v_calt_f32 (v_f32_t x, v_f32_t y)
+{
+ return vcaltq_f32 (x, y);
+}
+static inline v_f32_t
+v_div_f32 (v_f32_t x, v_f32_t y)
+{
+ return vdivq_f32 (x, y);
+}
+static inline v_f32_t
+v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
+{
+ return vfmaq_f32 (z, x, y);
+}
+static inline v_f32_t
+v_round_f32 (v_f32_t x)
+{
+ return vrndaq_f32 (x);
+}
+static inline v_s32_t
+v_round_s32 (v_f32_t x)
+{
+ return vcvtaq_s32_f32 (x);
+}
+static inline v_f32_t
+v_sel_f32 (v_u32_t p, v_f32_t x, v_f32_t y)
+{
+ return vbslq_f32 (p, x, y);
+}
+static inline v_u32_t
+v_sel_u32 (v_u32_t p, v_u32_t x, v_u32_t y)
+{
+ return vbslq_u32 (p, x, y);
+}
+static inline v_f32_t
+v_sqrt_f32 (v_f32_t x)
+{
+ return vsqrtq_f32 (x);
+}
+/* convert to type1 from type2. */
+static inline v_f32_t
+v_to_f32_s32 (v_s32_t x)
+{
+ return (v_f32_t){x[0], x[1], x[2], x[3]};
+}
+static inline v_s32_t
+v_to_s32_f32 (v_f32_t x)
+{
+ return vcvtq_s32_f32 (x);
+}
+static inline v_f32_t
+v_to_f32_u32 (v_u32_t x)
+{
+ return (v_f32_t){x[0], x[1], x[2], x[3]};
+}
+/* reinterpret as type1 from type2. */
+static inline v_u32_t
+v_as_u32_f32 (v_f32_t x)
+{
+ union { v_f32_t f; v_u32_t u; } r = {x};
+ return r.u;
+}
+static inline v_s32_t
+v_as_s32_f32 (v_f32_t x)
+{
+ union
+ {
+ v_f32_t f;
+ v_s32_t u;
+ } r = {x};
+ return r.u;
+}
+static inline v_f32_t
+v_as_f32_u32 (v_u32_t x)
+{
+ union { v_u32_t u; v_f32_t f; } r = {x};
+ return r.f;
+}
+static inline v_s32_t
+v_as_s32_u32 (v_u32_t x)
+{
+ union { v_u32_t u; v_s32_t i; } r = {x};
+ return r.i;
+}
+static inline v_u32_t
+v_as_u32_s32 (v_s32_t x)
+{
+ union { v_s32_t i; v_u32_t u; } r = {x};
+ return r.u;
+}
+static inline v_f32_t
+v_lookup_f32 (const f32_t *tab, v_u32_t idx)
+{
+ return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
+}
+static inline v_u32_t
+v_lookup_u32 (const u32_t *tab, v_u32_t idx)
+{
+ return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
+}
+static inline v_f32_t
+v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
+{
+ return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
+ p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
+}
+static inline v_f32_t
+v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
+ v_u32_t p)
+{
+ return (
+ v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1],
+ p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
+}
+
+static inline int
+v_lanes64 (void)
+{
+ return 2;
+}
+static inline v_f64_t
+v_f64 (f64_t x)
+{
+ return (v_f64_t){x, x};
+}
+static inline v_u64_t
+v_u64 (u64_t x)
+{
+ return (v_u64_t){x, x};
+}
+static inline v_s64_t
+v_s64 (s64_t x)
+{
+ return (v_s64_t){x, x};
+}
+static inline f64_t
+v_get_f64 (v_f64_t x, int i)
+{
+ return x[i];
+}
+static inline void
+v_set_f64 (v_f64_t *x, int i, f64_t v)
+{
+ (*x)[i] = v;
+}
+/* true if any elements of a v_cond result is non-zero. */
+static inline int
+v_any_u64 (v_u64_t x)
+{
+ /* assume elements in x are either 0 or -1u. */
+ return vpaddd_u64 (x) != 0;
+}
+/* true if all elements of a v_cond result is 1. */
+static inline int
+v_all_u64 (v_u64_t x)
+{
+ /* assume elements in x are either 0 or -1u. */
+ return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
+}
+/* to wrap the result of relational operators. */
+static inline v_u64_t
+v_cond_u64 (v_u64_t x)
+{
+ return x;
+}
+static inline v_f64_t
+v_abs_f64 (v_f64_t x)
+{
+ return vabsq_f64 (x);
+}
+static inline v_u64_t
+v_bsl_u64 (v_u64_t m, v_u64_t x, v_u64_t y)
+{
+ return vbslq_u64 (m, x, y);
+}
+static inline v_u64_t
+v_cagt_f64 (v_f64_t x, v_f64_t y)
+{
+ return vcagtq_f64 (x, y);
+}
+static inline v_f64_t
+v_div_f64 (v_f64_t x, v_f64_t y)
+{
+ return vdivq_f64 (x, y);
+}
+static inline v_f64_t
+v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
+{
+ return vfmaq_f64 (z, x, y);
+}
+static inline v_f64_t
+v_min_f64(v_f64_t x, v_f64_t y) {
+ return vminq_f64(x, y);
+}
+static inline v_f64_t
+v_round_f64 (v_f64_t x)
+{
+ return vrndaq_f64 (x);
+}
+static inline v_f64_t
+v_sel_f64 (v_u64_t p, v_f64_t x, v_f64_t y)
+{
+ return vbslq_f64 (p, x, y);
+}
+static inline v_f64_t
+v_sqrt_f64 (v_f64_t x)
+{
+ return vsqrtq_f64 (x);
+}
+static inline v_s64_t
+v_round_s64 (v_f64_t x)
+{
+ return vcvtaq_s64_f64 (x);
+}
+static inline v_u64_t
+v_trunc_u64 (v_f64_t x)
+{
+ return vcvtq_u64_f64 (x);
+}
+/* convert to type1 from type2. */
+static inline v_f64_t
+v_to_f64_s64 (v_s64_t x)
+{
+ return (v_f64_t){x[0], x[1]};
+}
+static inline v_f64_t
+v_to_f64_u64 (v_u64_t x)
+{
+ return (v_f64_t){x[0], x[1]};
+}
+static inline v_s64_t
+v_to_s64_f64 (v_f64_t x)
+{
+ return vcvtq_s64_f64 (x);
+}
+/* reinterpret as type1 from type2. */
+static inline v_u64_t
+v_as_u64_f64 (v_f64_t x)
+{
+ union { v_f64_t f; v_u64_t u; } r = {x};
+ return r.u;
+}
+static inline v_f64_t
+v_as_f64_u64 (v_u64_t x)
+{
+ union { v_u64_t u; v_f64_t f; } r = {x};
+ return r.f;
+}
+static inline v_s64_t
+v_as_s64_u64 (v_u64_t x)
+{
+ union { v_u64_t u; v_s64_t i; } r = {x};
+ return r.i;
+}
+static inline v_u64_t
+v_as_u64_s64 (v_s64_t x)
+{
+ union { v_s64_t i; v_u64_t u; } r = {x};
+ return r.u;
+}
+static inline v_f64_t
+v_lookup_f64 (const f64_t *tab, v_u64_t idx)
+{
+ return (v_f64_t){tab[idx[0]], tab[idx[1]]};
+}
+static inline v_u64_t
+v_lookup_u64 (const u64_t *tab, v_u64_t idx)
+{
+ return (v_u64_t){tab[idx[0]], tab[idx[1]]};
+}
+static inline v_f64_t
+v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
+{
+ return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]};
+}
+static inline v_f64_t
+v_call2_f64 (f64_t (*f) (f64_t, f64_t), v_f64_t x1, v_f64_t x2, v_f64_t y,
+ v_u64_t p)
+{
+ return (v_f64_t){p[0] ? f (x1[0], x2[0]) : y[0],
+ p[1] ? f (x1[1], x2[1]) : y[1]};
+}
+#endif
+
+#endif
+#endif
diff --git a/pl/math/v_sinh_3u.c b/pl/math/v_sinh_3u.c
new file mode 100644
index 0000000..57ec66e
--- /dev/null
+++ b/pl/math/v_sinh_3u.c
@@ -0,0 +1,94 @@
+/*
+ * Double-precision vector sinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "estrin.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define BigBound \
+ 0x4080000000000000 /* 2^9. expm1 helper overflows for large input. */
+#define TinyBound \
+ 0x3e50000000000000 /* 2^-26, below which sinh(x) rounds to x. */
+#define InvLn2 v_f64 (0x1.71547652b82fep0)
+#define MLn2hi v_f64 (-0x1.62e42fefa39efp-1)
+#define MLn2lo v_f64 (-0x1.abc9e3b39803fp-56)
+#define Shift v_f64 (0x1.8p52)
+#define One 0x3ff0000000000000
+#define C(i) v_f64 (__expm1_poly[i])
+
+#if V_SUPPORTED
+
+static inline v_f64_t
+expm1_inline (v_f64_t x)
+{
+ /* Reduce argument:
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where i = round(x / ln2)
+ and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */
+ v_f64_t j = v_fma_f64 (InvLn2, x, Shift) - Shift;
+ v_s64_t i = v_to_s64_f64 (j);
+ v_f64_t f = v_fma_f64 (j, MLn2hi, x);
+ f = v_fma_f64 (j, MLn2lo, f);
+ /* Approximate expm1(f) using polynomial. */
+ v_f64_t f2 = f * f, f4 = f2 * f2, f8 = f4 * f4;
+ v_f64_t p = v_fma_f64 (f2, ESTRIN_10 (f, f2, f4, f8, C), f);
+ /* t = 2^i. */
+ v_f64_t t = v_as_f64_u64 (v_as_u64_s64 (i << 52) + One);
+ /* expm1(x) ~= p * t + (t - 1). */
+ return v_fma_f64 (p, t, t - 1);
+}
+
+static NOINLINE VPCS_ATTR v_f64_t
+special_case (v_f64_t x)
+{
+ return v_call_f64 (sinh, x, x, v_u64 (-1));
+}
+
+/* Approximation for vector double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+ want 0x1.ab34e59d678d9p-2. */
+VPCS_ATTR v_f64_t V_NAME (sinh) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t iax = ix & AbsMask;
+ v_f64_t ax = v_as_f64_u64 (iax);
+ v_u64_t sign = ix & ~AbsMask;
+ v_f64_t halfsign = v_as_f64_u64 (sign | Half);
+
+#if WANT_SIMD_EXCEPT
+ v_u64_t special = v_cond_u64 ((iax - TinyBound) >= (BigBound - TinyBound));
+#else
+ v_u64_t special = v_cond_u64 (iax >= BigBound);
+#endif
+
+ /* Fall back to scalar variant for all lanes if any of them are special. */
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x);
+
+ /* Up to the point that expm1 overflows, we can use it to calculate sinh
+ using a slight rearrangement of the definition of sinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ v_f64_t t = expm1_inline (ax);
+ return (t + t / (t + 1)) * halfsign;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (sinh), 2.08)
+PL_TEST_EXPECT_FENV (V_NAME (sinh), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (sinh), 0, TinyBound, 1000)
+PL_TEST_INTERVAL (V_NAME (sinh), -0, -TinyBound, 1000)
+PL_TEST_INTERVAL (V_NAME (sinh), TinyBound, BigBound, 500000)
+PL_TEST_INTERVAL (V_NAME (sinh), -TinyBound, -BigBound, 500000)
+PL_TEST_INTERVAL (V_NAME (sinh), BigBound, inf, 1000)
+PL_TEST_INTERVAL (V_NAME (sinh), -BigBound, -inf, 1000)
+#endif
diff --git a/pl/math/v_sinhf_2u3.c b/pl/math/v_sinhf_2u3.c
new file mode 100644
index 0000000..49cf078
--- /dev/null
+++ b/pl/math/v_sinhf_2u3.c
@@ -0,0 +1,69 @@
+/*
+ * Single-precision vector sinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#include "v_expm1f_inline.h"
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define BigBound \
+ 0x42b0c0a7 /* 0x1.61814ep+6, above which expm1f helper overflows. */
+#define TinyBound \
+ 0x2fb504f4 /* 0x1.6a09e8p-32, below which expm1f underflows. */
+
+static NOINLINE VPCS_ATTR v_f32_t
+special_case (v_f32_t x)
+{
+ return v_call_f32 (sinhf, x, x, v_u32 (-1));
+}
+
+/* Approximation for vector single-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The maximum error is 2.26 ULP:
+ __v_sinhf(0x1.e34a9ep-4) got 0x1.e469ep-4 want 0x1.e469e4p-4. */
+VPCS_ATTR v_f32_t V_NAME (sinhf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t iax = ix & AbsMask;
+ v_f32_t ax = v_as_f32_u32 (iax);
+ v_u32_t sign = ix & ~AbsMask;
+ v_f32_t halfsign = v_as_f32_u32 (sign | Half);
+
+#if WANT_SIMD_EXCEPT
+ v_u32_t special = v_cond_u32 ((iax - TinyBound) >= (BigBound - TinyBound));
+#else
+ v_u32_t special = v_cond_u32 (iax >= BigBound);
+#endif
+
+ /* Fall back to the scalar variant for all lanes if any of them should trigger
+ an exception. */
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x);
+
+ /* Up to the point that expm1f overflows, we can use it to calculate sinhf
+ using a slight rearrangement of the definition of asinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ v_f32_t t = expm1f_inline (ax);
+ return (t + t / (t + 1)) * halfsign;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (sinhf), 1.76)
+PL_TEST_EXPECT_FENV (V_NAME (sinhf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (sinhf), 0, TinyBound, 1000)
+PL_TEST_INTERVAL (V_NAME (sinhf), -0, -TinyBound, 1000)
+PL_TEST_INTERVAL (V_NAME (sinhf), TinyBound, BigBound, 100000)
+PL_TEST_INTERVAL (V_NAME (sinhf), -TinyBound, -BigBound, 100000)
+PL_TEST_INTERVAL (V_NAME (sinhf), BigBound, inf, 1000)
+PL_TEST_INTERVAL (V_NAME (sinhf), -BigBound, -inf, 1000)
+#endif
diff --git a/pl/math/v_tan_3u5.c b/pl/math/v_tan_3u5.c
new file mode 100644
index 0000000..f87bacc
--- /dev/null
+++ b/pl/math/v_tan_3u5.c
@@ -0,0 +1,102 @@
+/*
+ * Double-precision vector tan(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "estrin.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define MHalfPiHi v_f64 (__v_tan_data.neg_half_pi_hi)
+#define MHalfPiLo v_f64 (__v_tan_data.neg_half_pi_lo)
+#define TwoOverPi v_f64 (0x1.45f306dc9c883p-1)
+#define Shift v_f64 (0x1.8p52)
+#define AbsMask 0x7fffffffffffffff
+#define RangeVal 0x4160000000000000 /* asuint64(2^23). */
+#define TinyBound 0x3e50000000000000 /* asuint64(2^-26). */
+#define C(i) v_f64 (__v_tan_data.poly[i])
+
+/* Special cases (fall back to scalar calls). */
+VPCS_ATTR
+NOINLINE static v_f64_t
+specialcase (v_f64_t x)
+{
+ return v_call_f64 (tan, x, x, v_u64 (-1));
+}
+
+/* Vector approximation for double-precision tan.
+ Maximum measured error is 3.48 ULP:
+ __v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
+ want -0x1.f6ccd8ecf7deap+37. */
+VPCS_ATTR
+v_f64_t V_NAME (tan) (v_f64_t x)
+{
+ v_u64_t iax = v_as_u64_f64 (x) & AbsMask;
+
+ /* Our argument reduction cannot calculate q with sufficient accuracy for very
+ large inputs. Fall back to scalar routine for all lanes if any are too
+ large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny
+ input to avoid underflow. Note pl does not supply a scalar double-precision
+ tan, so the fallback will be statically linked from the system libm. */
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u64 (iax - TinyBound > RangeVal - TinyBound)))
+#else
+ if (unlikely (v_any_u64 (iax > RangeVal)))
+#endif
+ return specialcase (x);
+
+ /* q = nearest integer to 2 * x / pi. */
+ v_f64_t q = v_fma_f64 (x, TwoOverPi, Shift) - Shift;
+ v_s64_t qi = v_to_s64_f64 (q);
+
+ /* Use q to reduce x to r in [-pi/4, pi/4], by:
+ r = x - q * pi/2, in extended precision. */
+ v_f64_t r = x;
+ r = v_fma_f64 (q, MHalfPiHi, r);
+ r = v_fma_f64 (q, MHalfPiLo, r);
+ /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
+ formula. */
+ r = r * 0.5;
+
+ /* Approximate tan(r) using order 8 polynomial.
+ tan(x) is odd, so polynomial has the form:
+ tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
+ Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
+ Then compute the approximation by:
+ tan(r) ~= r + r^3 * (C0 + r^2 * P(r)). */
+ v_f64_t r2 = r * r, r4 = r2 * r2, r8 = r4 * r4;
+ /* Use offset version of Estrin wrapper to evaluate from C1 onwards. */
+ v_f64_t p = ESTRIN_7_ (r2, r4, r8, C, 1);
+ p = v_fma_f64 (p, r2, C (0));
+ p = v_fma_f64 (r2, p * r, r);
+
+ /* Recombination uses double-angle formula:
+ tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
+ and reciprocity around pi/2:
+ tan(x) = 1 / (tan(pi/2 - x))
+ to assemble result using change-of-sign and conditional selection of
+ numerator/denominator, dependent on odd/even-ness of q (hence quadrant). */
+ v_f64_t n = v_fma_f64 (p, p, v_f64 (-1));
+ v_f64_t d = p * 2;
+
+ v_u64_t use_recip = v_cond_u64 ((v_as_u64_s64 (qi) & 1) == 0);
+
+ return v_sel_f64 (use_recip, -d, n) / v_sel_f64 (use_recip, n, d);
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (V_NAME (tan), 2.99)
+PL_TEST_EXPECT_FENV (V_NAME (tan), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (tan), 0, TinyBound, 5000)
+PL_TEST_INTERVAL (V_NAME (tan), TinyBound, RangeVal, 100000)
+PL_TEST_INTERVAL (V_NAME (tan), RangeVal, inf, 5000)
+PL_TEST_INTERVAL (V_NAME (tan), -0, -TinyBound, 5000)
+PL_TEST_INTERVAL (V_NAME (tan), -TinyBound, -RangeVal, 100000)
+PL_TEST_INTERVAL (V_NAME (tan), -RangeVal, -inf, 5000)
+#endif
diff --git a/pl/math/v_tan_data.c b/pl/math/v_tan_data.c
new file mode 100644
index 0000000..04e2516
--- /dev/null
+++ b/pl/math/v_tan_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients and helpers for double-precision vector tan(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+
+const struct v_tan_data __v_tan_data
+ = {.neg_half_pi_hi = -0x1.921fb54442d18p0,
+ .neg_half_pi_lo = -0x1.1a62633145c07p-54,
+ .poly
+ = {0x1.5555555555556p-2, 0x1.1111111110a63p-3, 0x1.ba1ba1bb46414p-5,
+ 0x1.664f47e5b5445p-6, 0x1.226e5e5ecdfa3p-7, 0x1.d6c7ddbf87047p-9,
+ 0x1.7ea75d05b583ep-10, 0x1.289f22964a03cp-11, 0x1.4e4fd14147622p-12}};
diff --git a/pl/math/v_tanf_3u5.c b/pl/math/v_tanf_3u5.c
new file mode 100644
index 0000000..828466b
--- /dev/null
+++ b/pl/math/v_tanf_3u5.c
@@ -0,0 +1,131 @@
+/*
+ * Single-precision vector tan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "estrinf.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+/* Constants. */
+#define NegPio2_1 (v_f32 (-0x1.921fb6p+0f))
+#define NegPio2_2 (v_f32 (0x1.777a5cp-25f))
+#define NegPio2_3 (v_f32 (0x1.ee59dap-50f))
+#define InvPio2 (v_f32 (0x1.45f306p-1f))
+#define RangeVal (0x47000000) /* asuint32(0x1p15f). */
+#define TinyBound (0x30000000) /* asuint32 (0x1p-31). */
+#define Shift (v_f32 (0x1.8p+23f))
+#define AbsMask (v_u32 (0x7fffffff))
+
+#define poly(i) v_f32 (__tanf_poly_data.poly_tan[i])
+
+/* Special cases (fall back to scalar calls). */
+VPCS_ATTR
+NOINLINE static v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
+{
+ return v_call_f32 (tanf, x, y, cmp);
+}
+
+/* Use a full Estrin scheme to evaluate polynomial. */
+static inline v_f32_t
+eval_poly (v_f32_t z)
+{
+ v_f32_t z2 = z * z;
+#if WANT_SIMD_EXCEPT
+ /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. If fp exceptions
+ are to be triggered correctly, sidestep this by fixing such lanes to 0. */
+ v_u32_t will_uflow = v_cond_u32 ((v_as_u32_f32 (z) & AbsMask) <= TinyBound);
+ if (unlikely (v_any_u32 (will_uflow)))
+ z2 = v_sel_f32 (will_uflow, v_f32 (0), z2);
+#endif
+ v_f32_t z4 = z2 * z2;
+ return ESTRIN_5 (z, z2, z4, poly);
+}
+
+/* Fast implementation of Neon tanf.
+ Maximum error is 3.45 ULP:
+ __v_tanf(-0x1.e5f0cap+13) got 0x1.ff9856p-1
+ want 0x1.ff9850p-1. */
+VPCS_ATTR
+v_f32_t V_NAME (tanf) (v_f32_t x)
+{
+ v_f32_t special_arg = x;
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t iax = ix & AbsMask;
+
+ /* iax >= RangeVal means x, if not inf or NaN, is too large to perform fast
+ regression. */
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered correctly, also special-case tiny
+ input, as this will load to overflow later. Fix any special lanes to 1 to
+ prevent any exceptions being triggered. */
+ v_u32_t special = v_cond_u32 (iax - TinyBound >= RangeVal - TinyBound);
+ if (unlikely (v_any_u32 (special)))
+ x = v_sel_f32 (special, v_f32 (1.0f), x);
+#else
+ /* Otherwise, special-case large and special values. */
+ v_u32_t special = v_cond_u32 (iax >= RangeVal);
+#endif
+
+ /* n = rint(x/(pi/2)). */
+ v_f32_t q = v_fma_f32 (InvPio2, x, Shift);
+ v_f32_t n = q - Shift;
+ /* n is representable as a signed integer, simply convert it. */
+ v_s32_t in = v_round_s32 (n);
+ /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */
+ v_s32_t alt = in & 1;
+ v_u32_t pred_alt = (alt != 0);
+
+ /* r = x - n * (pi/2) (range reduction into -pi./4 .. pi/4). */
+ v_f32_t r;
+ r = v_fma_f32 (NegPio2_1, n, x);
+ r = v_fma_f32 (NegPio2_2, n, r);
+ r = v_fma_f32 (NegPio2_3, n, r);
+
+ /* If x lives in an interval, where |tan(x)|
+ - is finite, then use a polynomial approximation of the form
+ tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
+ - grows to infinity then use symmetries of tangent and the identity
+ tan(r) = cotan(pi/2 - r) to express tan(x) as 1/tan(-r). Finally, use
+ the same polynomial approximation of tan as above. */
+
+ /* Perform additional reduction if required. */
+ v_f32_t z = v_sel_f32 (pred_alt, -r, r);
+
+ /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4]. */
+ v_f32_t z2 = r * r;
+ v_f32_t p = eval_poly (z2);
+ v_f32_t y = v_fma_f32 (z * z2, p, z);
+
+ /* Compute reciprocal and apply if required. */
+ v_f32_t inv_y = v_div_f32 (v_f32 (1.0f), y);
+ y = v_sel_f32 (pred_alt, inv_y, y);
+
+ /* Fast reduction does not handle the x = -0.0 case well,
+ therefore it is fixed here. */
+ y = v_sel_f32 (x == v_f32 (-0.0), x, y);
+
+ if (unlikely (v_any_u32 (special)))
+ return specialcase (special_arg, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (V_NAME (tanf), 2.96)
+PL_TEST_EXPECT_FENV (V_NAME (tanf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (tanf), -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (V_NAME (tanf), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (V_NAME (tanf), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME (tanf), 0x1p-23, 0.7, 50000)
+PL_TEST_INTERVAL (V_NAME (tanf), 0.7, 1.5, 50000)
+PL_TEST_INTERVAL (V_NAME (tanf), 1.5, 100, 50000)
+PL_TEST_INTERVAL (V_NAME (tanf), 100, 0x1p17, 50000)
+PL_TEST_INTERVAL (V_NAME (tanf), 0x1p17, inf, 50000)
+#endif
diff --git a/pl/math/v_tanh_3u.c b/pl/math/v_tanh_3u.c
new file mode 100644
index 0000000..c8b6c25
--- /dev/null
+++ b/pl/math/v_tanh_3u.c
@@ -0,0 +1,94 @@
+/*
+ * Double-precision vector tanh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "estrin.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#define AbsMask v_u64 (0x7fffffffffffffff)
+#define InvLn2 v_f64 (0x1.71547652b82fep0)
+#define MLn2hi v_f64 (-0x1.62e42fefa39efp-1)
+#define MLn2lo v_f64 (-0x1.abc9e3b39803fp-56)
+#define Shift v_f64 (0x1.8p52)
+#define C(i) v_f64 (__expm1_poly[i])
+
+#define BoringBound 0x403241bf835f9d5f /* asuint64 (0x1.241bf835f9d5fp+4). */
+#define TinyBound 0x3e40000000000000 /* asuint64 (0x1p-27). */
+#define One v_u64 (0x3ff0000000000000)
+
+static inline v_f64_t
+expm1_inline (v_f64_t x)
+{
+ /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
+ the scalar variant of tanh. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ v_f64_t j = v_fma_f64 (InvLn2, x, Shift) - Shift;
+ v_s64_t i = v_to_s64_f64 (j);
+ v_f64_t f = v_fma_f64 (j, MLn2hi, x);
+ f = v_fma_f64 (j, MLn2lo, f);
+
+ /* Approximate expm1(f) using polynomial. */
+ v_f64_t f2 = f * f;
+ v_f64_t f4 = f2 * f2;
+ v_f64_t p = v_fma_f64 (f2, ESTRIN_10 (f, f2, f4, f4 * f4, C), f);
+
+ /* t = 2 ^ i. */
+ v_f64_t t = v_as_f64_u64 (v_as_u64_s64 (i << 52) + One);
+ /* expm1(x) = p * t + (t - 1). */
+ return v_fma_f64 (p, t, t - 1);
+}
+
+static NOINLINE v_f64_t
+special_case (v_f64_t x, v_f64_t y, v_u64_t special)
+{
+ return v_call_f64 (tanh, x, y, special);
+}
+
+/* Vector approximation for double-precision tanh(x), using a simplified
+ version of expm1. The greatest observed error is 2.75 ULP:
+ __v_tanh(-0x1.c143c3a44e087p-3) got -0x1.ba31ba4691ab7p-3
+ want -0x1.ba31ba4691ab4p-3. */
+VPCS_ATTR v_f64_t V_NAME (tanh) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t ia = ix & AbsMask;
+
+ /* Trigger special-cases for tiny, boring and infinity/NaN. */
+ v_u64_t special = v_cond_u64 ((ia - TinyBound) > (BoringBound - TinyBound));
+ v_f64_t u;
+
+ /* To trigger fp exceptions correctly, set special lanes to a neutral value.
+ They will be fixed up later by the special-case handler. */
+ if (unlikely (v_any_u64 (special)))
+ u = v_sel_f64 (special, v_f64 (1), x) * 2;
+ else
+ u = x * 2;
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ v_f64_t q = expm1_inline (u);
+ v_f64_t y = q / (q + 2);
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, D, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (tanh), 2.26)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (tanh))
+PL_TEST_INTERVAL (V_NAME (tanh), 0, TinyBound, 1000)
+PL_TEST_INTERVAL (V_NAME (tanh), -0, -TinyBound, 1000)
+PL_TEST_INTERVAL (V_NAME (tanh), TinyBound, BoringBound, 100000)
+PL_TEST_INTERVAL (V_NAME (tanh), -TinyBound, -BoringBound, 100000)
+PL_TEST_INTERVAL (V_NAME (tanh), BoringBound, inf, 1000)
+PL_TEST_INTERVAL (V_NAME (tanh), -BoringBound, -inf, 1000)
+#endif
diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c
new file mode 100644
index 0000000..3616611
--- /dev/null
+++ b/pl/math/v_tanhf_2u6.c
@@ -0,0 +1,69 @@
+/*
+ * Single-precision vector tanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if V_SUPPORTED
+
+#include "v_expm1f_inline.h"
+
+#define BoringBound \
+ 0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \
+ negative). */
+#define AbsMask 0x7fffffff
+
+static NOINLINE v_f32_t
+special_case (v_f32_t x, v_f32_t y, v_u32_t special)
+{
+ return v_call_f32 (tanhf, x, y, special);
+}
+
+/* Approximation for single-precision vector tanh(x), using a simplified version
+ of expm1f. The maximum error is 2.58 ULP:
+ __v_tanhf(0x1.fa5eep-5) got 0x1.f9ba02p-5
+ want 0x1.f9ba08p-5. */
+VPCS_ATTR v_f32_t V_NAME (tanhf) (v_f32_t x)
+{
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t iax = ix & AbsMask;
+ v_u32_t sign = ix & ~AbsMask;
+ v_u32_t is_boring = v_cond_u32 (iax > BoringBound);
+ v_f32_t boring = v_as_f32_u32 (sign | One);
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered properly, set all special and boring
+ lanes to 1, which will trigger no exceptions, and fix them up later. */
+ v_u32_t special = v_cond_u32 ((iax > 0x7f800000) | (iax < 0x34000000));
+ ix = v_sel_u32 (is_boring, v_u32 (One), ix);
+ if (unlikely (v_any_u32 (special)))
+ ix = v_sel_u32 (special, v_u32 (One), ix);
+#else
+ v_u32_t special = v_cond_u32 ((iax > 0x7f800000) | (iax == 0));
+#endif
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ v_f32_t q = expm1f_inline (2 * v_as_f32_u32 (ix));
+ v_f32_t y = q / (q + 2);
+ y = v_sel_f32 (is_boring, boring, y);
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, y, special);
+ return y;
+}
+VPCS_ALIAS
+
+PL_SIG (V, F, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME (tanhf), 2.09)
+PL_TEST_EXPECT_FENV (V_NAME (tanhf), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (tanhf), 0, 0x1p-23, 1000)
+PL_TEST_INTERVAL (V_NAME (tanhf), -0, -0x1p-23, 1000)
+PL_TEST_INTERVAL (V_NAME (tanhf), 0x1p-23, 0x1.205966p+3, 100000)
+PL_TEST_INTERVAL (V_NAME (tanhf), -0x1p-23, -0x1.205966p+3, 100000)
+PL_TEST_INTERVAL (V_NAME (tanhf), 0x1.205966p+3, inf, 100)
+PL_TEST_INTERVAL (V_NAME (tanhf), -0x1.205966p+3, -inf, 100)
+#endif
diff --git a/pl/math/vn_acosh_3u5.c b/pl/math/vn_acosh_3u5.c
new file mode 100644
index 0000000..649735b
--- /dev/null
+++ b/pl/math/vn_acosh_3u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_acosh.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_acosh, _ZGVnN2v_acosh)
+#include "v_acosh_3u5.c"
+#endif
diff --git a/pl/math/vn_acoshf_3u1.c b/pl/math/vn_acoshf_3u1.c
new file mode 100644
index 0000000..8c5f106
--- /dev/null
+++ b/pl/math/vn_acoshf_3u1.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_acoshf.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_acoshf, _ZGVnN4v_acoshf)
+#include "v_acoshf_3u1.c"
+#endif
diff --git a/pl/math/vn_asinh_3u5.c b/pl/math/vn_asinh_3u5.c
new file mode 100644
index 0000000..0d2373b
--- /dev/null
+++ b/pl/math/vn_asinh_3u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_asinh.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_asinh, _ZGVnN2v_asinh)
+#include "v_asinh_3u5.c"
+#endif
diff --git a/pl/math/vn_asinhf_2u7.c b/pl/math/vn_asinhf_2u7.c
new file mode 100644
index 0000000..6c8927f
--- /dev/null
+++ b/pl/math/vn_asinhf_2u7.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_asinhf.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_asinhf, _ZGVnN4v_asinhf)
+#include "v_asinhf_2u7.c"
+#endif
diff --git a/pl/math/vn_atan2_3u.c b/pl/math/vn_atan2_3u.c
new file mode 100644
index 0000000..925b5b4
--- /dev/null
+++ b/pl/math/vn_atan2_3u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_atan2.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_atan2, _ZGVnN2vv_atan2)
+#include "v_atan2_3u.c"
+#endif
diff --git a/pl/math/vn_atan2f_3u.c b/pl/math/vn_atan2f_3u.c
new file mode 100644
index 0000000..51d33d5
--- /dev/null
+++ b/pl/math/vn_atan2f_3u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_atan2f.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_atan2f, _ZGVnN4vv_atan2f)
+#include "v_atan2f_3u.c"
+#endif
diff --git a/pl/math/vn_atan_2u5.c b/pl/math/vn_atan_2u5.c
new file mode 100644
index 0000000..ccebce2
--- /dev/null
+++ b/pl/math/vn_atan_2u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_atan.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_atan, _ZGVnN2v_atan)
+#include "v_atan_2u5.c"
+#endif
diff --git a/pl/math/vn_atanf_3u.c b/pl/math/vn_atanf_3u.c
new file mode 100644
index 0000000..b879727
--- /dev/null
+++ b/pl/math/vn_atanf_3u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_atanf.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_atanf, _ZGVnN4v_atanf)
+#include "v_atanf_3u.c"
+#endif
diff --git a/pl/math/vn_atanh_3u5.c b/pl/math/vn_atanh_3u5.c
new file mode 100644
index 0000000..19429b2
--- /dev/null
+++ b/pl/math/vn_atanh_3u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_atanh.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_atanh, _ZGVnN2v_atanh)
+#include "v_atanh_3u5.c"
+#endif
diff --git a/pl/math/vn_atanhf_3u1.c b/pl/math/vn_atanhf_3u1.c
new file mode 100644
index 0000000..7de226d
--- /dev/null
+++ b/pl/math/vn_atanhf_3u1.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_atanhf.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_atanhf, _ZGVnN4v_atanhf)
+#include "v_atanhf_3u1.c"
+#endif
diff --git a/pl/math/vn_cbrt_2u.c b/pl/math/vn_cbrt_2u.c
new file mode 100644
index 0000000..4cb0dc8
--- /dev/null
+++ b/pl/math/vn_cbrt_2u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_cbrt.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_cbrt, _ZGVnN2v_cbrt)
+#include "v_cbrt_2u.c"
+#endif
diff --git a/pl/math/vn_cbrtf_1u5.c b/pl/math/vn_cbrtf_1u5.c
new file mode 100644
index 0000000..40a72d8
--- /dev/null
+++ b/pl/math/vn_cbrtf_1u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_cbrtf.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_cbrtf, _ZGVnN4v_cbrtf)
+#include "v_cbrtf_1u5.c"
+#endif
diff --git a/pl/math/vn_cosh_2u.c b/pl/math/vn_cosh_2u.c
new file mode 100644
index 0000000..9bf7f02
--- /dev/null
+++ b/pl/math/vn_cosh_2u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_cosh.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_cosh, _ZGVnN2v_cosh)
+#include "v_cosh_2u.c"
+#endif
diff --git a/pl/math/vn_coshf_2u4.c b/pl/math/vn_coshf_2u4.c
new file mode 100644
index 0000000..b149cb3
--- /dev/null
+++ b/pl/math/vn_coshf_2u4.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_coshf.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_coshf, _ZGVnN4v_coshf)
+#include "v_coshf_2u4.c"
+#endif
diff --git a/pl/math/vn_erf_2u.c b/pl/math/vn_erf_2u.c
new file mode 100644
index 0000000..95bd141
--- /dev/null
+++ b/pl/math/vn_erf_2u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_erf.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_erf, _ZGVnN2v_erf)
+#include "v_erf_2u.c"
+#endif
diff --git a/pl/math/vn_erfc_4u.c b/pl/math/vn_erfc_4u.c
new file mode 100644
index 0000000..1cf6546
--- /dev/null
+++ b/pl/math/vn_erfc_4u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_erfc.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_erfc, _ZGVnN2v_erfc)
+#include "v_erfc_4u.c"
+#endif
diff --git a/pl/math/vn_erfcf_1u.c b/pl/math/vn_erfcf_1u.c
new file mode 100644
index 0000000..ef5a21d
--- /dev/null
+++ b/pl/math/vn_erfcf_1u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_erfcf.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_erfcf, _ZGVnN4v_erfcf)
+#include "v_erfcf_1u.c"
+#endif
diff --git a/pl/math/vn_erff_1u5.c b/pl/math/vn_erff_1u5.c
new file mode 100644
index 0000000..ee8848e
--- /dev/null
+++ b/pl/math/vn_erff_1u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_erff.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_erff, _ZGVnN4v_erff)
+#include "v_erff_1u5.c"
+#endif
diff --git a/pl/math/vn_exp_tail.c b/pl/math/vn_exp_tail.c
new file mode 100644
index 0000000..52a57fe
--- /dev/null
+++ b/pl/math/vn_exp_tail.c
@@ -0,0 +1,11 @@
+/*
+ * AdvSIMD vector PCS variant of __v_erfc.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#include "v_exp_tail.c"
+#endif
diff --git a/pl/math/vn_expf.c b/pl/math/vn_expf.c
new file mode 100644
index 0000000..83e7f0a
--- /dev/null
+++ b/pl/math/vn_expf.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_expf.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS strong_alias (__vn_expf, _ZGVnN4v_expf)
+#include "v_expf.c"
+#endif
diff --git a/pl/math/vn_expm1_2u5.c b/pl/math/vn_expm1_2u5.c
new file mode 100644
index 0000000..35111e2
--- /dev/null
+++ b/pl/math/vn_expm1_2u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_expm1.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_expm1, _ZGVnN2v_expm1)
+#include "v_expm1_2u5.c"
+#endif
diff --git a/pl/math/vn_expm1f_1u6.c b/pl/math/vn_expm1f_1u6.c
new file mode 100644
index 0000000..bea491f
--- /dev/null
+++ b/pl/math/vn_expm1f_1u6.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_expm1f.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_expm1f, _ZGVnN4v_expm1f)
+#include "v_expm1f_1u6.c"
+#endif
diff --git a/pl/math/vn_log10_2u5.c b/pl/math/vn_log10_2u5.c
new file mode 100644
index 0000000..5f32c33
--- /dev/null
+++ b/pl/math/vn_log10_2u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_log10.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_log10, _ZGVnN2v_log10)
+#include "v_log10_2u5.c"
+#endif
diff --git a/pl/math/vn_log10f_3u5.c b/pl/math/vn_log10f_3u5.c
new file mode 100644
index 0000000..2673ef5
--- /dev/null
+++ b/pl/math/vn_log10f_3u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_log10f.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_log10f, _ZGVnN4v_log10f)
+#include "v_log10f_3u5.c"
+#endif
diff --git a/pl/math/vn_log1p_2u5.c b/pl/math/vn_log1p_2u5.c
new file mode 100644
index 0000000..3f4f8d1
--- /dev/null
+++ b/pl/math/vn_log1p_2u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_log1p.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_log1p, _ZGVnN2v_log1p)
+#include "v_log1p_2u5.c"
+#endif
diff --git a/pl/math/vn_log1pf_2u1.c b/pl/math/vn_log1pf_2u1.c
new file mode 100644
index 0000000..a319bc9
--- /dev/null
+++ b/pl/math/vn_log1pf_2u1.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_log1pf.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_log1pf, _ZGVnN4v_log1pf)
+#include "v_log1pf_2u1.c"
+#endif
diff --git a/pl/math/vn_log2_3u.c b/pl/math/vn_log2_3u.c
new file mode 100644
index 0000000..a870392
--- /dev/null
+++ b/pl/math/vn_log2_3u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_log2.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_log2, _ZGVnN2v_log2)
+#include "v_log2_3u.c"
+#endif
diff --git a/pl/math/vn_log2f_2u5.c b/pl/math/vn_log2f_2u5.c
new file mode 100644
index 0000000..b4a9cb7
--- /dev/null
+++ b/pl/math/vn_log2f_2u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_log2f.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS strong_alias (__vn_log2f, _ZGVnN4v_log2f)
+#include "v_log2f_2u5.c"
+#endif
diff --git a/pl/math/vn_sinh_3u.c b/pl/math/vn_sinh_3u.c
new file mode 100644
index 0000000..7c881de
--- /dev/null
+++ b/pl/math/vn_sinh_3u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_sinh.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_sinh, _ZGVnN2v_sinh)
+#include "v_sinh_3u.c"
+#endif
diff --git a/pl/math/vn_sinhf_2u3.c b/pl/math/vn_sinhf_2u3.c
new file mode 100644
index 0000000..251e732
--- /dev/null
+++ b/pl/math/vn_sinhf_2u3.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_sinhf.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_sinhf, _ZGVnN4v_sinhf)
+#include "v_sinhf_2u3.c"
+#endif
diff --git a/pl/math/vn_tan_3u5.c b/pl/math/vn_tan_3u5.c
new file mode 100644
index 0000000..a4efb06
--- /dev/null
+++ b/pl/math/vn_tan_3u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_tan.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_tan, _ZGVnN2v_tan)
+#include "v_tan_3u5.c"
+#endif
diff --git a/pl/math/vn_tanf_3u5.c b/pl/math/vn_tanf_3u5.c
new file mode 100644
index 0000000..a88cb40
--- /dev/null
+++ b/pl/math/vn_tanf_3u5.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_tanf.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_tanf, _ZGVnN4v_tanf)
+#include "v_tanf_3u5.c"
+#endif
diff --git a/pl/math/vn_tanh_3u.c b/pl/math/vn_tanh_3u.c
new file mode 100644
index 0000000..cb2746c
--- /dev/null
+++ b/pl/math/vn_tanh_3u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_tanh.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_tanh, _ZGVnN2v_tanh)
+#include "v_tanh_3u.c"
+#endif
diff --git a/pl/math/vn_tanhf_2u6.c b/pl/math/vn_tanhf_2u6.c
new file mode 100644
index 0000000..47f0a7f
--- /dev/null
+++ b/pl/math/vn_tanhf_2u6.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_tanhf.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS PL_ALIAS (__vn_tanhf, _ZGVnN4v_tanhf)
+#include "v_tanhf_2u6.c"
+#endif