aboutsummaryrefslogtreecommitdiff
path: root/pl/math
diff options
context:
space:
mode:
authorPierre Blanchard <pierre.blanchard@arm.com>2022-03-25 15:04:38 +0000
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2022-03-25 15:54:23 +0000
commit82c8c8aeb2875223f63fc83187b0e8dd36fc8afe (patch)
treef33fe353b716ee607ccf5613bbcd446f43ed4c29 /pl/math
parent4c32619682de9d8632fe039153a3e26a6f095482 (diff)
downloadarm-optimized-routines-82c8c8aeb2875223f63fc83187b0e8dd36fc8afe.tar.gz
Add build system and infra for the pl directory
- pl/ is built from top-level Makefile by adding pl to SUBS - PLSUBS lists all pl/ subdirectories that can be built, it only contains math for now. Please modify this list in the top-level config.mk. - pl libraries and infrastructure is built in build/pl/ - As a result math/ and pl/math generate separate test and bench binaries. - Use infrastructure provided in math/test to test and profile pl/math routines. The build system ensures the appropriate header files are first copied to build/pl/include/test to define wrappers and entries in ulp and mathbench. - Copyied scalar erff from math/ to pl/math/ to show build system is functional. - pl mathlib libraries are built separately to the main/portable mathlib libraries and installed alongside.
Diffstat (limited to 'pl/math')
-rw-r--r--pl/math/Dir.mk151
-rw-r--r--pl/math/erff_1u5.c103
-rw-r--r--pl/math/erff_data.c16
-rw-r--r--pl/math/include/mathlib.h26
-rw-r--r--pl/math/math_config.h326
-rw-r--r--pl/math/math_errf.c80
-rw-r--r--pl/math/test/mathbench_funcs.h7
-rw-r--r--pl/math/test/mathbench_wrappers.h7
-rwxr-xr-xpl/math/test/runulp.sh45
-rw-r--r--pl/math/test/testcases/directed/erff.tst17
-rw-r--r--pl/math/test/testcases/random/float.tst6
-rw-r--r--pl/math/test/ulp_funcs.h7
-rw-r--r--pl/math/test/ulp_wrappers.h27
13 files changed, 818 insertions, 0 deletions
diff --git a/pl/math/Dir.mk b/pl/math/Dir.mk
new file mode 100644
index 0000000..13ecf87
--- /dev/null
+++ b/pl/math/Dir.mk
@@ -0,0 +1,151 @@
+# Makefile fragment - requires GNU make
+#
+# Copyright (c) 2019-2022, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+PLM := $(srcdir)/pl/math
+AOR := $(srcdir)/math
+B := $(srcdir)/build/pl/math
+
+math-lib-srcs := $(wildcard $(PLM)/*.[cS])
+math-test-srcs := \
+ $(AOR)/test/mathtest.c \
+ $(AOR)/test/mathbench.c \
+ $(AOR)/test/ulp.c \
+
+math-test-host-srcs := $(wildcard $(AOR)/test/rtest/*.[cS])
+
+math-includes := $(patsubst $(PLM)/%,build/pl/%,$(wildcard $(PLM)/include/*.h))
+math-test-includes := $(patsubst $(PLM)/%,build/pl/include/%,$(wildcard $(PLM)/test/*.h))
+
+math-libs := \
+ build/pl/lib/libmathlib.so \
+ build/pl/lib/libmathlib.a \
+
+math-tools := \
+ build/pl/bin/mathtest \
+ build/pl/bin/mathbench \
+ build/pl/bin/mathbench_libc \
+ build/pl/bin/runulp.sh \
+ build/pl/bin/ulp \
+
+math-host-tools := \
+ build/pl/bin/rtest \
+
+math-lib-objs := $(patsubst $(PLM)/%,$(B)/%.o,$(basename $(math-lib-srcs)))
+math-test-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-srcs)))
+math-host-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
+math-target-objs := $(math-lib-objs) $(math-test-objs)
+math-objs := $(math-target-objs) $(math-target-objs:%.o=%.os) $(math-host-objs)
+
+pl/math-files := \
+ $(math-objs) \
+ $(math-libs) \
+ $(math-tools) \
+ $(math-host-tools) \
+ $(math-includes) \
+ $(math-test-includes) \
+
+all-pl/math: $(math-libs) $(math-tools) $(math-includes) $(math-test-includes)
+
+$(math-objs): $(math-includes) $(math-test-includes)
+$(math-objs): CFLAGS_PL += $(math-cflags)
+$(B)/test/mathtest.o: CFLAGS_PL += -fmath-errno
+$(math-host-objs): CC = $(HOST_CC)
+$(math-host-objs): CFLAGS_PL = $(HOST_CFLAGS)
+
+$(B)/test/ulp.o: $(AOR)/test/ulp.h
+
+build/pl/lib/libmathlib.so: $(math-lib-objs:%.o=%.os)
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -shared -o $@ $^
+
+build/pl/lib/libmathlib.a: $(math-lib-objs)
+ rm -f $@
+ $(AR) rc $@ $^
+ $(RANLIB) $@
+
+$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
+$(math-tools): LDLIBS += $(math-ldlibs) -lm
+
+# Some targets to build pl/math/test from math/test sources
+build/pl/math/test/%.o: $(srcdir)/math/test/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.o: $(srcdir)/math/test/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.os: $(srcdir)/math/test/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.os: $(srcdir)/math/test/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+# Some targets to build pl/ sources using appropriate flags
+build/pl/%.o: $(srcdir)/pl/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.o: $(srcdir)/pl/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.os: $(srcdir)/pl/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.os: $(srcdir)/pl/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/bin/rtest: $(math-host-objs)
+ $(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
+
+build/pl/bin/mathtest: $(B)/test/mathtest.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+build/pl/bin/mathbench: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+# This is not ideal, but allows custom symbols in mathbench to get resolved.
+build/pl/bin/mathbench_libc: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/pl/lib/libmathlib.a -lm
+
+build/pl/bin/ulp: $(B)/test/ulp.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+build/pl/include/%.h: $(PLM)/include/%.h
+ cp $< $@
+
+build/pl/include/test/%.h: $(PLM)/test/%.h
+ cp $< $@
+
+build/pl/bin/%.sh: $(PLM)/test/%.sh
+ cp $< $@
+
+math-tests := $(wildcard $(PLM)/test/testcases/directed/*.tst)
+math-rtests := $(wildcard $(PLM)/test/testcases/random/*.tst)
+
+check-pl/math-test: $(math-tools)
+ cat $(math-tests) | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
+
+check-pl/math-rtest: $(math-host-tools) $(math-tools)
+ cat $(math-rtests) | build/pl/bin/rtest | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
+
+check-pl/math-ulp: $(math-tools)
+ ULPFLAGS="$(math-ulpflags)" build/pl/bin/runulp.sh $(EMULATOR)
+
+check-pl/math: check-pl/math-test check-pl/math-rtest check-pl/math-ulp
+
+$(DESTDIR)$(libdir)/pl/%.so: build/pl/lib/%.so
+ $(INSTALL) -D $< $@
+
+$(DESTDIR)$(libdir)/pl/%: build/pl/lib/%
+ $(INSTALL) -m 644 -D $< $@
+
+$(DESTDIR)$(includedir)/pl/%: build/pl/include/%
+ $(INSTALL) -m 644 -D $< $@
+
+install-pl/math: \
+ $(math-libs:build/pl/lib/%=$(DESTDIR)$(libdir)/pl/%) \
+ $(math-includes:build/pl/include/%=$(DESTDIR)$(includedir)/pl/%)
+
+clean-pl/math:
+ rm -f $(pl/math-files)
+
+.PHONY: all-pl/math check-pl/math-test check-pl/math-rtest check-pl/math-ulp check-pl/math install-pl/math clean-pl/math
diff --git a/pl/math/erff_1u5.c b/pl/math/erff_1u5.c
new file mode 100644
index 0000000..1073603
--- /dev/null
+++ b/pl/math/erff_1u5.c
@@ -0,0 +1,103 @@
+/*
+ * Single-precision erf(x) function.
+ *
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include "math_config.h"
+
+#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
+#define A __erff_data.erff_poly_A
+#define B __erff_data.erff_poly_B
+
+/* Top 12 bits of a float. */
+static inline uint32_t
+top12 (float x)
+{
+ return asuint (x) >> 20;
+}
+
+/* Efficient implementation of erff using either a pure polynomial approximation
+ or the exponential of a polynomial. Worst-case error is 1.09ulps at
+ 0x1.c111acp-1. */
+float
+erff (float x)
+{
+ float r, x2, u;
+
+ /* Get top word. */
+ uint32_t ix = asuint (x);
+ uint32_t sign = ix >> 31;
+ uint32_t ia12 = top12 (x) & 0x7ff;
+
+ /* Limit of both intervals is 0.875 for performance reasons but coefficients
+ computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
+ from 0.94 to 1.1ulps. */
+ if (ia12 < 0x3f6)
+ { /* a = |x| < 0.875. */
+
+ /* Tiny and subnormal cases. */
+ if (unlikely (ia12 < 0x318))
+ { /* |x| < 2^(-28). */
+ if (unlikely (ia12 < 0x040))
+ { /* |x| < 2^(-119). */
+ float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
+ return check_uflowf (y);
+ }
+ return x + TwoOverSqrtPiMinusOne * x;
+ }
+
+ x2 = x * x;
+
+ /* Normalized cases (|x| < 0.921875) - Use Horner scheme for x+x*P(x^2).
+ */
+ r = A[5];
+ r = fmaf (r, x2, A[4]);
+ r = fmaf (r, x2, A[3]);
+ r = fmaf (r, x2, A[2]);
+ r = fmaf (r, x2, A[1]);
+ r = fmaf (r, x2, A[0]);
+ r = fmaf (r, x, x);
+ }
+ else if (ia12 < 0x408)
+ { /* |x| < 4.0 - Use a custom Estrin scheme. */
+
+ float a = fabsf (x);
+ /* Use Estrin scheme on high order (small magnitude) coefficients. */
+ r = fmaf (B[6], a, B[5]);
+ u = fmaf (B[4], a, B[3]);
+ x2 = x * x;
+ r = fmaf (r, x2, u);
+ /* Then switch to pure Horner scheme. */
+ r = fmaf (r, a, B[2]);
+ r = fmaf (r, a, B[1]);
+ r = fmaf (r, a, B[0]);
+ r = fmaf (r, a, a);
+ /* Single precision exponential with ~0.5ulps ensures erff has maximum
+ relative error below 1ulp on [0.921875, 4.0] and below 1.1ulps on
+ [0.875, 4.0]. */
+ r = expf (-r);
+ /* Explicit copysign (calling copysignf increases latency). */
+ if (sign)
+ r = -1.0f + r;
+ else
+ r = 1.0f - r;
+ }
+ else
+ { /* |x| >= 4.0. */
+
+ /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */
+ if (unlikely (ia12 >= 0x7f8))
+ return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
+
+ /* Explicit copysign (calling copysignf increases latency). */
+ if (sign)
+ r = -1.0f;
+ else
+ r = 1.0f;
+ }
+ return r;
+}
diff --git a/pl/math/erff_data.c b/pl/math/erff_data.c
new file mode 100644
index 0000000..eeb0b20
--- /dev/null
+++ b/pl/math/erff_data.c
@@ -0,0 +1,16 @@
+/*
+ * Data for approximation of erff.
+ *
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Minimax approximation of erff. */
+const struct erff_data __erff_data
+ = {.erff_poly_A = {0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
+ -0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f},
+ .erff_poly_B
+ = {0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f, -0x1.8d6300p-6f,
+ 0x1.fd1336p-9f, -0x1.91d2ccp-12f, 0x1.222900p-16f}};
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
new file mode 100644
index 0000000..fc8eb0b
--- /dev/null
+++ b/pl/math/include/mathlib.h
@@ -0,0 +1,26 @@
+/*
+ * Public API.
+ *
+ * Copyright (c) 2015-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _MATHLIB_H
+#define _MATHLIB_H
+
+float erff (float);
+
+#if __aarch64__
+#if __GNUC__ >= 5
+typedef __Float32x4_t __f32x4_t;
+typedef __Float64x2_t __f64x2_t;
+#elif __clang_major__*100+__clang_minor__ >= 305
+typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
+typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
+#else
+#error Unsupported compiler
+#endif
+
+#endif
+
+#endif
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
new file mode 100644
index 0000000..0790416
--- /dev/null
+++ b/pl/math/math_config.h
@@ -0,0 +1,326 @@
+/*
+ * Configuration for math routines.
+ *
+ * Copyright (c) 2017-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _MATH_CONFIG_H
+#define _MATH_CONFIG_H
+
+#include <math.h>
+#include <stdint.h>
+
+#ifndef WANT_ROUNDING
+/* If defined to 1, return correct results for special cases in non-nearest
+ rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
+ This may be set to 0 if there is no fenv support or if math functions only
+ get called in round to nearest mode. */
+# define WANT_ROUNDING 1
+#endif
+#ifndef WANT_ERRNO
+/* If defined to 1, set errno in math functions according to ISO C. Many math
+ libraries do not set errno, so this is 0 by default. It may need to be
+ set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
+# define WANT_ERRNO 0
+#endif
+#ifndef WANT_ERRNO_UFLOW
+/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
+# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
+#endif
+
+/* Compiler can inline round as a single instruction. */
+#ifndef HAVE_FAST_ROUND
+# if __aarch64__
+# define HAVE_FAST_ROUND 1
+# else
+# define HAVE_FAST_ROUND 0
+# endif
+#endif
+
+/* Compiler can inline lround, but not (long)round(x). */
+#ifndef HAVE_FAST_LROUND
+# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
+# define HAVE_FAST_LROUND 1
+# else
+# define HAVE_FAST_LROUND 0
+# endif
+#endif
+
+/* Compiler can inline fma as a single instruction. */
+#ifndef HAVE_FAST_FMA
+# if defined FP_FAST_FMA || __aarch64__
+# define HAVE_FAST_FMA 1
+# else
+# define HAVE_FAST_FMA 0
+# endif
+#endif
+
+/* Provide *_finite symbols and some of the glibc hidden symbols
+ so libmathlib can be used with binaries compiled against glibc
+ to interpose math functions with both static and dynamic linking. */
+#ifndef USE_GLIBC_ABI
+# if __GNUC__
+# define USE_GLIBC_ABI 1
+# else
+# define USE_GLIBC_ABI 0
+# endif
+#endif
+
+/* Optionally used extensions. */
+#ifdef __GNUC__
+# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
+# define NOINLINE __attribute__ ((noinline))
+# define UNUSED __attribute__ ((unused))
+# define likely(x) __builtin_expect (!!(x), 1)
+# define unlikely(x) __builtin_expect (x, 0)
+# if __GNUC__ >= 9
+# define attribute_copy(f) __attribute__ ((copy (f)))
+# else
+# define attribute_copy(f)
+# endif
+# define strong_alias(f, a) \
+ extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
+# define hidden_alias(f, a) \
+ extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
+ attribute_copy (f);
+#else
+# define HIDDEN
+# define NOINLINE
+# define UNUSED
+# define likely(x) (x)
+# define unlikely(x) (x)
+#endif
+
+#if HAVE_FAST_ROUND
+/* When set, the roundtoint and converttoint functions are provided with
+ the semantics documented below. */
+# define TOINT_INTRINSICS 1
+
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+ consistently with converttoint so the results match. If the result
+ would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
+static inline double_t
+roundtoint (double_t x)
+{
+ return round (x);
+}
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+ consistently with roundtoint. If the result is not representible in an
+ int32_t then the semantics is unspecified. */
+static inline int32_t
+converttoint (double_t x)
+{
+# if HAVE_FAST_LROUND
+ return lround (x);
+# else
+ return (long) round (x);
+# endif
+}
+#endif
+
+static inline uint32_t
+asuint (float f)
+{
+ union
+ {
+ float f;
+ uint32_t i;
+ } u = {f};
+ return u.i;
+}
+
+static inline float
+asfloat (uint32_t i)
+{
+ union
+ {
+ uint32_t i;
+ float f;
+ } u = {i};
+ return u.f;
+}
+
+static inline uint64_t
+asuint64 (double f)
+{
+ union
+ {
+ double f;
+ uint64_t i;
+ } u = {f};
+ return u.i;
+}
+
+static inline double
+asdouble (uint64_t i)
+{
+ union
+ {
+ uint64_t i;
+ double f;
+ } u = {i};
+ return u.f;
+}
+
+#ifndef IEEE_754_2008_SNAN
+# define IEEE_754_2008_SNAN 1
+#endif
+static inline int
+issignalingf_inline (float x)
+{
+ uint32_t ix = asuint (x);
+ if (!IEEE_754_2008_SNAN)
+ return (ix & 0x7fc00000) == 0x7fc00000;
+ return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
+}
+
+static inline int
+issignaling_inline (double x)
+{
+ uint64_t ix = asuint64 (x);
+ if (!IEEE_754_2008_SNAN)
+ return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
+ return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
+}
+
+#if __aarch64__ && __GNUC__
+/* Prevent the optimization of a floating-point expression. */
+static inline float
+opt_barrier_float (float x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+ return x;
+}
+static inline double
+opt_barrier_double (double x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+ return x;
+}
+/* Force the evaluation of a floating-point expression for its side-effect. */
+static inline void
+force_eval_float (float x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+}
+static inline void
+force_eval_double (double x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+}
+#else
+static inline float
+opt_barrier_float (float x)
+{
+ volatile float y = x;
+ return y;
+}
+static inline double
+opt_barrier_double (double x)
+{
+ volatile double y = x;
+ return y;
+}
+static inline void
+force_eval_float (float x)
+{
+ volatile float y UNUSED = x;
+}
+static inline void
+force_eval_double (double x)
+{
+ volatile double y UNUSED = x;
+}
+#endif
+
+/* Evaluate an expression as the specified type, normally a type
+ cast should be enough, but compilers implement non-standard
+ excess-precision handling, so when FLT_EVAL_METHOD != 0 then
+ these functions may need to be customized. */
+static inline float
+eval_as_float (float x)
+{
+ return x;
+}
+static inline double
+eval_as_double (double x)
+{
+ return x;
+}
+
+/* Error handling tail calls for special cases, with a sign argument.
+ The sign of the return value is set if the argument is non-zero. */
+
+/* The result overflows. */
+HIDDEN float __math_oflowf (uint32_t);
+/* The result underflows to 0 in nearest rounding mode. */
+HIDDEN float __math_uflowf (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only. */
+HIDDEN float __math_may_uflowf (uint32_t);
+/* Division by zero. */
+HIDDEN float __math_divzerof (uint32_t);
+/* The result overflows. */
+HIDDEN double __math_oflow (uint32_t);
+/* The result underflows to 0 in nearest rounding mode. */
+HIDDEN double __math_uflow (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only. */
+HIDDEN double __math_may_uflow (uint32_t);
+/* Division by zero. */
+HIDDEN double __math_divzero (uint32_t);
+
+/* Error handling using input checking. */
+
+/* Invalid input unless it is a quiet NaN. */
+HIDDEN float __math_invalidf (float);
+/* Invalid input unless it is a quiet NaN. */
+HIDDEN double __math_invalid (double);
+
+/* Error handling using output checking, only for errno setting. */
+
+/* Check if the result overflowed to infinity. */
+HIDDEN double __math_check_oflow (double);
+/* Check if the result underflowed to 0. */
+HIDDEN double __math_check_uflow (double);
+
+/* Check if the result overflowed to infinity. */
+static inline double
+check_oflow (double x)
+{
+ return WANT_ERRNO ? __math_check_oflow (x) : x;
+}
+
+/* Check if the result underflowed to 0. */
+static inline double
+check_uflow (double x)
+{
+ return WANT_ERRNO ? __math_check_uflow (x) : x;
+}
+
+/* Check if the result overflowed to infinity. */
+HIDDEN float __math_check_oflowf (float);
+/* Check if the result underflowed to 0. */
+HIDDEN float __math_check_uflowf (float);
+
+/* Check if the result overflowed to infinity. */
+static inline float
+check_oflowf (float x)
+{
+ return WANT_ERRNO ? __math_check_oflowf (x) : x;
+}
+
+/* Check if the result underflowed to 0. */
+static inline float
+check_uflowf (float x)
+{
+ return WANT_ERRNO ? __math_check_uflowf (x) : x;
+}
+
+extern const struct erff_data
+{
+ float erff_poly_A[6];
+ float erff_poly_B[7];
+} __erff_data HIDDEN;
+
+#endif
diff --git a/pl/math/math_errf.c b/pl/math/math_errf.c
new file mode 100644
index 0000000..f2aad46
--- /dev/null
+++ b/pl/math/math_errf.c
@@ -0,0 +1,80 @@
+/*
+ * Single-precision math error handling.
+ *
+ * Copyright (c) 2017-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#if WANT_ERRNO
+#include <errno.h>
+/* NOINLINE reduces code size and avoids making math functions non-leaf
+ when the error handling is inlined. */
+NOINLINE static float
+with_errnof (float y, int e)
+{
+ errno = e;
+ return y;
+}
+#else
+#define with_errnof(x, e) (x)
+#endif
+
+/* NOINLINE reduces code size. */
+NOINLINE static float
+xflowf (uint32_t sign, float y)
+{
+ y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
+ return with_errnof (y, ERANGE);
+}
+
+HIDDEN float
+__math_uflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1p-95f);
+}
+
+#if WANT_ERRNO_UFLOW
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+ is valid even if the result is non-zero, but in the subnormal range. */
+HIDDEN float
+__math_may_uflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1.4p-75f);
+}
+#endif
+
+HIDDEN float
+__math_oflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1p97f);
+}
+
+HIDDEN float
+__math_divzerof (uint32_t sign)
+{
+ float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
+ return with_errnof (y, ERANGE);
+}
+
+HIDDEN float
+__math_invalidf (float x)
+{
+ float y = (x - x) / (x - x);
+ return isnan (x) ? y : with_errnof (y, EDOM);
+}
+
+/* Check result and set errno if necessary. */
+
+HIDDEN float
+__math_check_uflowf (float y)
+{
+ return y == 0.0f ? with_errnof (y, ERANGE) : y;
+}
+
+HIDDEN float
+__math_check_oflowf (float y)
+{
+ return isinf (y) ? with_errnof (y, ERANGE) : y;
+}
diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h
new file mode 100644
index 0000000..64c1300
--- /dev/null
+++ b/pl/math/test/mathbench_funcs.h
@@ -0,0 +1,7 @@
+/*
+ * Function entries for mathbench.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+F (erff, -4.0, 4.0)
diff --git a/pl/math/test/mathbench_wrappers.h b/pl/math/test/mathbench_wrappers.h
new file mode 100644
index 0000000..8f85079
--- /dev/null
+++ b/pl/math/test/mathbench_wrappers.h
@@ -0,0 +1,7 @@
+/*
+ * Function wrappers for mathbench.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
new file mode 100755
index 0000000..5d29f21
--- /dev/null
+++ b/pl/math/test/runulp.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# ULP error check script.
+#
+# Copyright (c) 2019-2022, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+#set -x
+set -eu
+
+# cd to bin directory.
+cd "${0%/*}"
+
+rmodes='n'
+flags="${ULPFLAGS:--q}"
+emu="$@"
+
+FAIL=0
+PASS=0
+
+t() {
+ [ $r = "n" ] && Lt=$L || Lt=$Ldir
+ $emu ./ulp -r $r -e $Lt $flags "$@" && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+}
+
+Ldir=0.5
+for r in $rmodes
+do
+
+L=0.6
+Ldir=0.9
+t erff 0 0xffff0000 10000
+t erff 0x1p-127 0x1p-26 40000
+t erff -0x1p-127 -0x1p-26 40000
+t erff 0x1p-26 0x1p3 40000
+t erff -0x1p-26 -0x1p3 40000
+t erff 0 inf 40000
+Ldir=0.5
+
+done
+
+[ 0 -eq $FAIL ] || {
+ echo "FAILED $FAIL PASSED $PASS"
+ exit 1
+}
diff --git a/pl/math/test/testcases/directed/erff.tst b/pl/math/test/testcases/directed/erff.tst
new file mode 100644
index 0000000..48a3d6e
--- /dev/null
+++ b/pl/math/test/testcases/directed/erff.tst
@@ -0,0 +1,17 @@
+; erff.tst
+;
+; Copyright (c) 2007-2022, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=erff op1=7fc00001 result=7fc00001 errno=0
+func=erff op1=ffc00001 result=7fc00001 errno=0
+func=erff op1=7f800001 result=7fc00001 errno=0 status=i
+func=erff op1=ff800001 result=7fc00001 errno=0 status=i
+func=erff op1=7f800000 result=3f800000 errno=0
+func=erff op1=ff800000 result=bf800000 errno=0
+func=erff op1=00000000 result=00000000 errno=ERANGE
+func=erff op1=80000000 result=80000000 errno=ERANGE
+func=erff op1=00000001 result=00000001 errno=0 status=ux
+func=erff op1=80000001 result=80000001 errno=0 status=ux
+func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0
+func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0
diff --git a/pl/math/test/testcases/random/float.tst b/pl/math/test/testcases/random/float.tst
new file mode 100644
index 0000000..caf0bd3
--- /dev/null
+++ b/pl/math/test/testcases/random/float.tst
@@ -0,0 +1,6 @@
+!! float.tst - Random test case specification for SP functions
+!!
+!! Copyright (c) 2022, Arm Limited.
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+test erff 10000
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
new file mode 100644
index 0000000..68d9ec7
--- /dev/null
+++ b/pl/math/test/ulp_funcs.h
@@ -0,0 +1,7 @@
+/*
+ * Function entries for ulp.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+F1 (erf)
diff --git a/pl/math/test/ulp_wrappers.h b/pl/math/test/ulp_wrappers.h
new file mode 100644
index 0000000..38e0f63
--- /dev/null
+++ b/pl/math/test/ulp_wrappers.h
@@ -0,0 +1,27 @@
+/*
+ * Function wrappers for ulp.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#if USE_MPFR
+static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
+static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
+#endif
+
+/* A bit of a hack: call vector functions twice with the same
+ input in lane 0 but a different value in other lanes: once
+ with an in-range value and then with a special case value. */
+static int secondcall;
+
+/* Wrappers for vector functions. */
+#if __aarch64__ && WANT_VMATH
+typedef __f32x4_t v_float;
+typedef __f64x2_t v_double;
+static const float fv[2] = {1.0f, -INFINITY};
+static const double dv[2] = {1.0, -INFINITY};
+static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; }
+static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; }
+
+#endif