diff options
Diffstat (limited to 'math/e_logf.c')
-rw-r--r-- | math/e_logf.c | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/math/e_logf.c b/math/e_logf.c new file mode 100644 index 0000000..484b02d --- /dev/null +++ b/math/e_logf.c @@ -0,0 +1,169 @@ +/* + * e_logf.c - single precision log function + * + * Copyright (C) 2009-2015, ARM Limited, All Rights Reserved + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of the Optimized Routines project + */ + +/* + * Algorithm was once taken from Cody & Waite, but has been munged + * out of all recognition by SGT. + */ + +#include "arm_math.h" +#include "math_private.h" +#include <math.h> +#include <errno.h> + + +float +ARM__logf(float X) +{ + int N = 0; + int aindex; + float a, x, s; + unsigned ix = fai(X); + + if (__builtin_expect((ix - 0x00800000) >= 0x7f800000 - 0x00800000, 0)) { + if ((ix << 1) > 0xff000000) /* NaN */ + return FLOAT_INFNAN(X); + if (ix == 0x7f800000) /* +inf */ + return X; + if (X < 0) { /* anything negative */ + return MATHERR_LOGF_NEG(X); + } + if (X == 0) { + return MATHERR_LOGF_0(X); + } + /* That leaves denormals. */ + N = -23; + X *= 0x1p+23F; + ix = fai(X); + } + + /* + * Separate X into three parts: + * - 2^N for some integer N + * - a number a of the form (1+k/8) for k=0,...,7 + * - a residual which we compute as s = (x-a)/(x+a), for + * x=X/2^N. + * + * We pick the _nearest_ (N,a) pair, so that (x-a) has magnitude + * at most 1/16. Hence, we must round things that are just + * _below_ a power of two up to the next power of two, so this + * isn't as simple as extracting the raw exponent of the FP + * number. Instead we must grab the exponent together with the + * top few bits of the mantissa, and round (in integers) there. + */ + { + int rounded = ix + 0x00080000; + int Nnew = (rounded >> 23) - 127; + aindex = (rounded >> 20) & 7; + a = fhex(0x3f800000 + (aindex << 20)); + N += Nnew; + x = fhex(ix - (Nnew << 23)); + } + + if (!N && !aindex) { + /* + * Use an alternative strategy for very small |x|, which + * avoids the 1ULP of relative error introduced in the + * computation of s. If our nearest (N,a) pair is N=0,a=1, + * that means we have -1/32 < x-a < 1/16, on which interval + * the ordinary series for log(1+z) (setting z-x-a) will + * converge adequately fast; so we can simply find an + * approximation to log(1+z)/z good on that interval and + * scale it by z on the way out. + * + * Coefficients generated by the command + +./auxiliary/remez.jl --variable=z --suffix=f -- '-1/BigFloat(32)' '+1/BigFloat(16)' 3 0 '(log1p(x)-x)/x^2' + + */ + float z = x - 1.0f; + float p = z*z * ( + -4.999999767382730053173434595877399055021398381370452534949864039404089549132551e-01f+z*(3.333416379155995401749506866323446447523793085809161350343357014272193712456391e-01f+z*(-2.501299948811686421962724839011563450757435183422532362736159418564644404218257e-01f+z*(1.903576945606738444146078468935429697455230136403008172485495359631510244557255e-01f))) + ); + + return z + p; + } + + /* + * Now we have N, a and x correct, so that |x-a| <= 1/16. + * Compute s. + * + * (Since |x+a| >= 2, this means that |s| will be at most 1/32.) + */ + s = (x - a) / (x + a); + + /* + * The point of computing s = (x-a)/(x+a) was that this makes x + * equal to a * (1+s)/(1-s). So we can now compute log(x) by + * means of computing log((1+s)/(1-s)) (which has a more + * efficiently converging series), and adding log(a) which we + * obtain from a lookup table. + * + * So our full answer to log(X) is now formed by adding together + * N*log(2) + log(a) + log((1+s)/(1-s)). + * + * Now log((1+s)/(1-s)) has the exact Taylor series + * + * 2s + 2s^3/3 + 2s^5/5 + ... + * + * and what we do is to compute all but the first term of that + * as a polynomial approximation in s^2, then add on the first + * term - and all the other bits and pieces above - in + * precision-and-a-half so as to keep the total error down. + */ + { + float s2 = s*s; + + /* + * We want a polynomial L(s^2) such that + * + * 2s + s^3*L(s^2) = log((1+s)/(1-s)) + * + * => L(s^2) = (log((1+s)/(1-s)) - 2s) / s^3 + * + * => L(z) = (log((1+sqrt(z))/(1-sqrt(z))) - 2*sqrt(z)) / sqrt(z)^3 + * + * The required range of the polynomial is only [0,1/32^2]. + * + * Our accuracy requirement for the polynomial approximation + * is that we don't want to introduce any error more than + * about 2^-23 times the _top_ bit of s. But the value of + * the polynomial has magnitude about s^3; and since |s| < + * 2^-5, this gives us |s^3/s| < 2^-10. In other words, + * our approximation only needs to be accurate to 13 bits or + * so before its error is lost in the noise when we add it + * to everything else. + * + * Coefficients generated by the command + +./auxiliary/remez.jl --variable=s2 --suffix=f -- '0' '1/BigFloat(32^2)' 1 0 '(abs(x) < 1e-20 ? BigFloat(2)/3 + 2*x/5 + 2*x^2/7 + 2*x^3/9 : (log((1+sqrt(x))/(1-sqrt(x)))-2*sqrt(x))/sqrt(x^3))' + + */ + float p = s * s2 * ( + 6.666666325680271091157649745099739739798210281016897722498744752867165138320995e-01f+s2*(4.002792299542401431889592846825025487338520940900492146195427243856292349188402e-01f) + ); + + static const float log2hi = 0x1.62ep-1F, log2lo = 0x1.0bfbe8p-15F; + static const float logahi[8] = { 0x0p+0F, 0x1.e26p-4F, 0x1.c8ep-3F, 0x1.46p-2F, 0x1.9f2p-2F, 0x1.f12p-2F, 0x1.1e8p-1F, 0x1.41cp-1F }; + static const float logalo[8] = { 0x0p+0F, 0x1.076e2ap-16F, 0x1.f7c79ap-15F, 0x1.8bc21cp-14F, 0x1.23eccp-14F, 0x1.1ebf5ep-15F, 0x1.7d79c2p-15F, 0x1.8fe846p-13F }; + return (N*log2hi + logahi[aindex]) + (2.0f*s + (N*log2lo + logalo[aindex] + p)); + } +} |