pl/math/log1p_2u.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

/*
 * Double-precision log(1+x) function.
 * Copyright (c) 2022, Arm Limited.
 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 */

#include "estrin.h"
#include "math_config.h"
#include "pl_sig.h"

#define Ln2Hi 0x1.62e42fefa3800p-1
#define Ln2Lo 0x1.ef35793c76730p-45
#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)).  */
#define OneMHfRt2Top                                                           \
  0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)).  */
#define OneTop12 0x3ff
#define BottomMask 0xffffffff
#define OneMHfRt2 0x3fd2bec333018866
#define Rt2MOne 0x3fda827999fcef32
#define AbsMask 0x7fffffffffffffff
#define ExpM63 0x3c00
#define C(i) __log1p_data.coeffs[i]

static inline double
eval_poly (double f)
{
  double f2 = f * f;
  double f4 = f2 * f2;
  double f8 = f4 * f4;
  return ESTRIN_18 (f, f2, f4, f8, f8 * f8, C);
}

/* log1p approximation using polynomial on reduced interval. Largest
   observed errors are near the lower boundary of the region where k
   is 0.
   Maximum measured error: 1.75ULP.
   log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-2
			       want -0x1.65fb8659a2f92p-2.  */
double
log1p (double x)
{
  uint64_t ix = asuint64 (x);
  uint64_t ia = ix & AbsMask;
  uint32_t ia16 = ia >> 48;

  /* Handle special cases first.  */
  if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000
		|| ix == 0x8000000000000000))
    {
      if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)
	{
	  /* x ==  -0 => log1p(x) =  -0.
	     x == Inf => log1p(x) = Inf.  */
	  return x;
	}
      if (ix == 0xbff0000000000000)
	{
	  /* x == -1 => log1p(x) = -Inf.  */
	  return __math_divzero (-1);
	  ;
	}
      if (ia16 >= 0x7ff0)
	{
	  /* x == +/-NaN => log1p(x) = NaN.  */
	  return __math_invalid (asdouble (ia));
	}
      /* x  <      -1 => log1p(x) =  NaN.
	 x ==    -Inf => log1p(x) =  NaN.  */
      return __math_invalid (x);
    }

  /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
			   is in [sqrt(2)/2, sqrt(2)]):
     log1p(x) = k*log(2) + log1p(f).

     f may not be representable exactly, so we need a correction term:
     let m = round(1 + x), c = (1 + x) - m.
     c << m: at very small x, log1p(x) ~ x, hence:
     log(1+x) - log(m) ~ c/m.

     We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m.  */

  uint64_t sign = ix & ~AbsMask;
  if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))
    {
      if (unlikely (ia16 <= ExpM63))
	{
	  /* If exponent of x <= -63 then shortcut the polynomial and avoid
	     underflow by just returning x, which is exactly rounded in this
	     region.  */
	  return x;
	}
      /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the
	 logic below, as k = 0 and f = x and therefore representable exactly.
	 All we need is to return the polynomial.  */
      return fma (x, eval_poly (x) * x, x);
    }

  /* Obtain correctly scaled k by manipulation in the exponent.  */
  double m = x + 1;
  uint64_t mi = asuint64 (m);
  uint32_t u = (mi >> 32) + OneMHfRt2Top;
  int32_t k = (int32_t) (u >> 20) - OneTop12;

  /* Correction term c/m.  */
  double cm = (x - (m - 1)) / m;

  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
  uint32_t utop = (u & 0x000fffff) + HfRt2Top;
  uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
  double f = asdouble (u_red) - 1;

  /* Approximate log1p(x) on the reduced input using a polynomial. Because
     log1p(0)=0 we choose an approximation of the form:
	x + C0*x^2 + C1*x^3 + C2x^4 + ...
     Hence approximation has the form f + f^2 * P(f)
	where P(x) = C0 + C1*x + C2x^2 + ...  */
  double p = fma (f, eval_poly (f) * f, f);

  double kd = k;
  double y = fma (Ln2Lo, kd, cm);
  return y + fma (Ln2Hi, kd, p);
}

PL_SIG (S, D, 1, log1p, -0.9, 10.0)