math/single/e_logf.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

/*
 * e_logf.c - single precision log function
 *
 * Copyright (c) 2009-2018, Arm Limited.
 * SPDX-License-Identifier: MIT
 */

/*
 * Algorithm was once taken from Cody & Waite, but has been munged
 * out of all recognition by SGT.
 */

#include <math.h>
#include <errno.h>
#include "math_private.h"

float
logf(float X)
{
    int N = 0;
    int aindex;
    float a, x, s;
    unsigned ix = fai(X);

    if (__builtin_expect((ix - 0x00800000) >= 0x7f800000 - 0x00800000, 0)) {
        if ((ix << 1) > 0xff000000) /* NaN */
            return FLOAT_INFNAN(X);
        if (ix == 0x7f800000)          /* +inf */
            return X;
        if (X < 0) {                   /* anything negative */
            return MATHERR_LOGF_NEG(X);
        }
        if (X == 0) {
            return MATHERR_LOGF_0(X);
        }
        /* That leaves denormals. */
        N = -23;
        X *= 0x1p+23F;
        ix = fai(X);
    }

    /*
     * Separate X into three parts:
     *  - 2^N for some integer N
     *  - a number a of the form (1+k/8) for k=0,...,7
     *  - a residual which we compute as s = (x-a)/(x+a), for
     *    x=X/2^N.
     *
     * We pick the _nearest_ (N,a) pair, so that (x-a) has magnitude
     * at most 1/16. Hence, we must round things that are just
     * _below_ a power of two up to the next power of two, so this
     * isn't as simple as extracting the raw exponent of the FP
     * number. Instead we must grab the exponent together with the
     * top few bits of the mantissa, and round (in integers) there.
     */
    {
        int rounded = ix + 0x00080000;
        int Nnew = (rounded >> 23) - 127;
        aindex = (rounded >> 20) & 7;
        a = fhex(0x3f800000 + (aindex << 20));
        N += Nnew;
        x = fhex(ix - (Nnew << 23));
    }

    if (!N && !aindex) {
        /*
         * Use an alternative strategy for very small |x|, which
         * avoids the 1ULP of relative error introduced in the
         * computation of s. If our nearest (N,a) pair is N=0,a=1,
         * that means we have -1/32 < x-a < 1/16, on which interval
         * the ordinary series for log(1+z) (setting z-x-a) will
         * converge adequately fast; so we can simply find an
         * approximation to log(1+z)/z good on that interval and
         * scale it by z on the way out.
         *
         * Coefficients generated by the command

./auxiliary/remez.jl --variable=z --suffix=f -- '-1/BigFloat(32)' '+1/BigFloat(16)' 3 0 '(log1p(x)-x)/x^2'

         */
        float z = x - 1.0f;
        float p = z*z * (
            -4.999999767382730053173434595877399055021398381370452534949864039404089549132551e-01f+z*(3.333416379155995401749506866323446447523793085809161350343357014272193712456391e-01f+z*(-2.501299948811686421962724839011563450757435183422532362736159418564644404218257e-01f+z*(1.903576945606738444146078468935429697455230136403008172485495359631510244557255e-01f)))
            );

        return z + p;
    }

    /*
     * Now we have N, a and x correct, so that |x-a| <= 1/16.
     * Compute s.
     *
     * (Since |x+a| >= 2, this means that |s| will be at most 1/32.)
     */
    s = (x - a) / (x + a);

    /*
     * The point of computing s = (x-a)/(x+a) was that this makes x
     * equal to a * (1+s)/(1-s). So we can now compute log(x) by
     * means of computing log((1+s)/(1-s)) (which has a more
     * efficiently converging series), and adding log(a) which we
     * obtain from a lookup table.
     *
     * So our full answer to log(X) is now formed by adding together
     * N*log(2) + log(a) + log((1+s)/(1-s)).
     *
     * Now log((1+s)/(1-s)) has the exact Taylor series
     *
     *   2s + 2s^3/3 + 2s^5/5 + ...
     *
     * and what we do is to compute all but the first term of that
     * as a polynomial approximation in s^2, then add on the first
     * term - and all the other bits and pieces above - in
     * precision-and-a-half so as to keep the total error down.
     */
    {
        float s2 = s*s;

        /*
         * We want a polynomial L(s^2) such that
         *
         *    2s + s^3*L(s^2) = log((1+s)/(1-s))
         *
         * => L(s^2) = (log((1+s)/(1-s)) - 2s) / s^3
         *
         * => L(z) = (log((1+sqrt(z))/(1-sqrt(z))) - 2*sqrt(z)) / sqrt(z)^3
         *
         * The required range of the polynomial is only [0,1/32^2].
         *
         * Our accuracy requirement for the polynomial approximation
         * is that we don't want to introduce any error more than
         * about 2^-23 times the _top_ bit of s. But the value of
         * the polynomial has magnitude about s^3; and since |s| <
         * 2^-5, this gives us |s^3/s| < 2^-10. In other words,
         * our approximation only needs to be accurate to 13 bits or
         * so before its error is lost in the noise when we add it
         * to everything else.
         *
         * Coefficients generated by the command

./auxiliary/remez.jl --variable=s2 --suffix=f -- '0' '1/BigFloat(32^2)' 1 0 '(abs(x) < 1e-20 ? BigFloat(2)/3 + 2*x/5 + 2*x^2/7 + 2*x^3/9 : (log((1+sqrt(x))/(1-sqrt(x)))-2*sqrt(x))/sqrt(x^3))'

         */
        float p = s * s2 * (
            6.666666325680271091157649745099739739798210281016897722498744752867165138320995e-01f+s2*(4.002792299542401431889592846825025487338520940900492146195427243856292349188402e-01f)
            );

        static const float log2hi = 0x1.62ep-1F, log2lo = 0x1.0bfbe8p-15F;
        static const float logahi[8] = { 0x0p+0F, 0x1.e26p-4F, 0x1.c8ep-3F, 0x1.46p-2F, 0x1.9f2p-2F, 0x1.f12p-2F, 0x1.1e8p-1F, 0x1.41cp-1F };
        static const float logalo[8] = { 0x0p+0F, 0x1.076e2ap-16F, 0x1.f7c79ap-15F, 0x1.8bc21cp-14F, 0x1.23eccp-14F, 0x1.1ebf5ep-15F, 0x1.7d79c2p-15F, 0x1.8fe846p-13F };
        return (N*log2hi + logahi[aindex]) + (2.0f*s + (N*log2lo + logalo[aindex] + p));
    }
}