pl/math/log1p_2u.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

/*
 * Double-precision log(1+x) function.
 *
 * Copyright (c) 2022-2023, Arm Limited.
 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 */

#include "estrin.h"
#include "math_config.h"
#include "pl_sig.h"
#include "pl_test.h"

#define Ln2Hi 0x1.62e42fefa3800p-1
#define Ln2Lo 0x1.ef35793c76730p-45
#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)).  */
#define OneMHfRt2Top                                                           \
  0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)).  */
#define OneTop12 0x3ff
#define BottomMask 0xffffffff
#define OneMHfRt2 0x3fd2bec333018866
#define Rt2MOne 0x3fda827999fcef32
#define AbsMask 0x7fffffffffffffff
#define ExpM63 0x3c00
#define C(i) __log1p_data.coeffs[i]

static inline double
eval_poly (double f)
{
  double f2 = f * f;
  double f4 = f2 * f2;
  double f8 = f4 * f4;
  return ESTRIN_18 (f, f2, f4, f8, f8 * f8, C);
}

/* log1p approximation using polynomial on reduced interval. Largest
   observed errors are near the lower boundary of the region where k
   is 0.
   Maximum measured error: 1.75ULP.
   log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-2
			       want -0x1.65fb8659a2f92p-2.  */
double
log1p (double x)
{
  uint64_t ix = asuint64 (x);
  uint64_t ia = ix & AbsMask;
  uint32_t ia16 = ia >> 48;

  /* Handle special cases first.  */
  if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000
		|| ix == 0x8000000000000000))
    {
      if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)
	{
	  /* x ==  -0 => log1p(x) =  -0.
	     x == Inf => log1p(x) = Inf.  */
	  return x;
	}
      if (ix == 0xbff0000000000000)
	{
	  /* x == -1 => log1p(x) = -Inf.  */
	  return __math_divzero (-1);
	  ;
	}
      if (ia16 >= 0x7ff0)
	{
	  /* x == +/-NaN => log1p(x) = NaN.  */
	  return __math_invalid (asdouble (ia));
	}
      /* x  <      -1 => log1p(x) =  NaN.
	 x ==    -Inf => log1p(x) =  NaN.  */
      return __math_invalid (x);
    }

  /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
			   is in [sqrt(2)/2, sqrt(2)]):
     log1p(x) = k*log(2) + log1p(f).

     f may not be representable exactly, so we need a correction term:
     let m = round(1 + x), c = (1 + x) - m.
     c << m: at very small x, log1p(x) ~ x, hence:
     log(1+x) - log(m) ~ c/m.

     We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m.  */

  uint64_t sign = ix & ~AbsMask;
  if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))
    {
      if (unlikely (ia16 <= ExpM63))
	{
	  /* If exponent of x <= -63 then shortcut the polynomial and avoid
	     underflow by just returning x, which is exactly rounded in this
	     region.  */
	  return x;
	}
      /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the
	 logic below, as k = 0 and f = x and therefore representable exactly.
	 All we need is to return the polynomial.  */
      return fma (x, eval_poly (x) * x, x);
    }

  /* Obtain correctly scaled k by manipulation in the exponent.  */
  double m = x + 1;
  uint64_t mi = asuint64 (m);
  uint32_t u = (mi >> 32) + OneMHfRt2Top;
  int32_t k = (int32_t) (u >> 20) - OneTop12;

  /* Correction term c/m.  */
  double cm = (x - (m - 1)) / m;

  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
  uint32_t utop = (u & 0x000fffff) + HfRt2Top;
  uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
  double f = asdouble (u_red) - 1;

  /* Approximate log1p(x) on the reduced input using a polynomial. Because
     log1p(0)=0 we choose an approximation of the form:
	x + C0*x^2 + C1*x^3 + C2x^4 + ...
     Hence approximation has the form f + f^2 * P(f)
	where P(x) = C0 + C1*x + C2x^2 + ...  */
  double p = fma (f, eval_poly (f) * f, f);

  double kd = k;
  double y = fma (Ln2Lo, kd, cm);
  return y + fma (Ln2Hi, kd, p);
}

PL_SIG (S, D, 1, log1p, -0.9, 10.0)
PL_TEST_ULP (log1p, 1.26)
PL_TEST_INTERVAL (log1p, -10.0, 10.0, 10000)
PL_TEST_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
PL_TEST_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
PL_TEST_INTERVAL (log1p, 0.001, 1.0, 50000)
PL_TEST_INTERVAL (log1p, 0.0, -0x1p-23, 50000)
PL_TEST_INTERVAL (log1p, -0x1p-23, -0.001, 50000)
PL_TEST_INTERVAL (log1p, -0.001, -1.0, 50000)
PL_TEST_INTERVAL (log1p, -1.0, inf, 5000)