pl/math/erfc_4u5.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

/*
 * Double-precision erfc(x) function.
 *
 * Copyright (c) 2019-2022, Arm Limited.
 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 */

#include <stdint.h>
#include <math.h>
#include <errno.h>
#include "math_config.h"
#include "pairwise_horner.h"

#define AbsMask (0x7fffffffffffffff)

#define xint __erfc_data.interval_bounds
#define PX __erfc_data.poly

/* Accurate exponential from optimized routines.  */
double
__exp_dd (double x, double xtail);

static inline double
eval_poly_horner (double z, int i)
{
  double z2 = z * z;
#define C(j) PX[i][j]
  return PAIRWISE_HORNER_12 (z, z2, C);
}

/* Accurate evaluation of exp(x^2)
   using compensated product (x^2 ~ x*x + e2)
   and the __exp_dd(y,d) routine, that is the
   computation of exp(y+d) with a small correction d<<y.  */
static inline double
eval_accurate_gaussian (double a)
{
  double e2;
  double a2 = a * a;
  double aa1 = -fma (0x1.0000002p27, a, -a);
  aa1 = fma (0x1.0000002p27, a, aa1);
  double aa2 = a - aa1;
  e2 = fma (-aa1, aa1, a2);
  e2 = fma (-aa1, aa2, e2);
  e2 = fma (-aa2, aa1, e2);
  e2 = fma (-aa2, aa2, e2);
  return __exp_dd (-a2, e2);
}

/* Approximation of erfc for |x| > 6.0.  */
static inline double
approx_erfc_hi (double x, int i)
{
  double a = fabs (x);
  double z = a - xint[i];
  double p = eval_poly_horner (z, i);
  double e_mx2 = eval_accurate_gaussian (a);
  return p * e_mx2;
}

static inline int
get_itv_idx (double x)
{
  /* Interval bounds are a logarithmic scale, i.e. interval n has
     lower bound 2^(n/4) - 1. Use the exponent of (|x|+1)^4 to obtain
     the interval index.  */
  double a = asdouble (asuint64 (x) & AbsMask);
  double z = a + 1.0;
  z = z * z;
  z = z * z;
  return (asuint64 (z) >> 52) - 1023;
}

/* Approximation of erfc for |x| < 6.0.  */
static inline double
approx_erfc_lo (double x, uint32_t sign, int i)
{
  double a = fabs (x);
  double z = a - xint[i];
  double p = eval_poly_horner (z, i);
  double e_mx2 = eval_accurate_gaussian (a);
  if (sign)
    return fma (-p, e_mx2, 2.0);
  else
    return p * e_mx2;
}

/* Top 12 bits of a double (sign and exponent bits).  */
static inline uint32_t
abstop12 (double x)
{
  return (asuint64 (x) >> 52) & 0x7ff;
}

/* Top 32 bits of a double.  */
static inline uint32_t
top32 (double x)
{
  return asuint64 (x) >> 32;
}

/* Fast erfc implementation.
   The approximation uses polynomial approximation of
   exp(x^2) * erfc(x) with fixed orders on 20 intervals.
   Maximum measured error is 4.05 ULPs:.
   erfc(0x1.e8ebf6a2b0801p-2) got 0x1.ff84036f8f0b3p-2
			     want 0x1.ff84036f8f0b7p-2.  */
double
erfc (double x)
{
  /* Get top words.  */
  uint32_t ix = top32 (x); /* We need to compare at most 32 bits.  */
  uint32_t ia = ix & 0x7fffffff;
  uint32_t sign = ix >> 31;

  /* Handle special cases and small values with a single comparison:
     abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
     Special cases erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2
     Errno EDOM does not have to be set in case of erfc(nan).
     Only ERANGE may be set in case of underflow.
     Small values (|x|<small)
       |x|<0x1.0p-56 => accurate up to 0.5 ULP (top12(0x1p-50) = 0x3c7)
       |x|<0x1.0p-50 => accurate up to 1.0 ULP (top12(0x1p-50) = 0x3cd).  */
  if (unlikely (abstop12 (x) - 0x3cd >= (abstop12 (INFINITY) & 0x7ff) - 0x3cd))
    {
      if (abstop12 (x) >= 0x7ff)
	return (double) (sign << 1) + 1.0 / x; /* special cases.  */
      else
	return 1.0 - x; /* small case.  */
    }
  else if (ia < 0x40180000)
    { /* |x| < 6.0.  */
      return approx_erfc_lo (x, sign, get_itv_idx (x));
    }
  else if (sign)
    { /* x <= -6.0.  */
      return 2.0;
    }
  else if (ia < 0x403c0000)
    { /* 6.0 <= x < 28.  */
      return approx_erfc_hi (x, get_itv_idx (x));
    }
  else
    { /* x > 28.  */
      return __math_uflow (0);
    }
}