pl/math/tanf_3u3.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202

/*
 * Single-precision scalar tan(x) function.
 *
 * Copyright (c) 2021-2023, Arm Limited.
 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 */
#include "math_config.h"
#include "pl_sig.h"
#include "pl_test.h"
#include "pairwise_hornerf.h"

/* Useful constants.  */
#define NegPio2_1 (-0x1.921fb6p+0f)
#define NegPio2_2 (0x1.777a5cp-25f)
#define NegPio2_3 (0x1.ee59dap-50f)
/* Reduced from 0x1p20 to 0x1p17 to ensure 3.5ulps.  */
#define RangeVal (0x1p17f)
#define InvPio2 ((0x1.45f306p-1f))
#define Shift (0x1.8p+23f)
#define AbsMask (0x7fffffff)
#define Pio4 (0x1.921fb6p-1)
/* 2PI * 2^-64.  */
#define Pio2p63 (0x1.921FB54442D18p-62)

#define P(i) __tanf_poly_data.poly_tan[i]
#define Q(i) __tanf_poly_data.poly_cotan[i]

static inline float
eval_P (float z)
{
  return PAIRWISE_HORNER_5 (z, z * z, P);
}

static inline float
eval_Q (float z)
{
  return PAIRWISE_HORNER_3 (z, z * z, Q);
}

/* Reduction of the input argument x using Cody-Waite approach, such that x = r
   + n * pi/2 with r lives in [-pi/4, pi/4] and n is a signed integer.  */
static inline float
reduce (float x, int32_t *in)
{
  /* n = rint(x/(pi/2)).  */
  float r = x;
  float q = fmaf (InvPio2, r, Shift);
  float n = q - Shift;
  /* There is no rounding here, n is representable by a signed integer.  */
  *in = (int32_t) n;
  /* r = x - n * (pi/2)  (range reduction into -pi/4 .. pi/4).  */
  r = fmaf (NegPio2_1, n, r);
  r = fmaf (NegPio2_2, n, r);
  r = fmaf (NegPio2_3, n, r);
  return r;
}

/* Table with 4/PI to 192 bit precision.  To avoid unaligned accesses
   only 8 new bits are added per entry, making the table 4 times larger.  */
static const uint32_t __inv_pio4[24]
  = {0x000000a2, 0x0000a2f9, 0x00a2f983, 0xa2f9836e, 0xf9836e4e, 0x836e4e44,
     0x6e4e4415, 0x4e441529, 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
     0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 0x34ddc0db, 0xddc0db62,
     0xc0db6295, 0xdb629599, 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041};

/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
   XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
   Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
   Reduction uses a table of 4/PI with 192 bits of precision.  A 32x96->128 bit
   multiply computes the exact 2.62-bit fixed-point modulo.  Since the result
   can have at most 29 leading zeros after the binary point, the double
   precision result is accurate to 33 bits.  */
static inline double
reduce_large (uint32_t xi, int *np)
{
  const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
  int shift = (xi >> 23) & 7;
  uint64_t n, res0, res1, res2;

  xi = (xi & 0xffffff) | 0x800000;
  xi <<= shift;

  res0 = xi * arr[0];
  res1 = (uint64_t) xi * arr[4];
  res2 = (uint64_t) xi * arr[8];
  res0 = (res2 >> 32) | (res0 << 32);
  res0 += res1;

  n = (res0 + (1ULL << 61)) >> 62;
  res0 -= n << 62;
  double x = (int64_t) res0;
  *np = n;
  return x * Pio2p63;
}

/* Top 12 bits of the float representation with the sign bit cleared.  */
static inline uint32_t
top12 (float x)
{
  return (asuint (x) >> 20);
}

/* Fast single-precision tan implementation.
   Maximum ULP error: 3.293ulps.
   tanf(0x1.c849eap+16) got -0x1.fe8d98p-1 want -0x1.fe8d9ep-1.  */
float
tanf (float x)
{
  /* Get top words.  */
  uint32_t ix = asuint (x);
  uint32_t ia = ix & AbsMask;
  uint32_t ia12 = ia >> 20;

  /* Dispatch between no reduction (small numbers), fast reduction and
     slow large numbers reduction. The reduction step determines r float
     (|r| < pi/4) and n signed integer such that x = r + n * pi/2.  */
  int32_t n;
  float r;
  if (ia12 < top12 (Pio4))
    {
      /* Optimize small values.  */
      if (unlikely (ia12 < top12 (0x1p-12f)))
	{
	  if (unlikely (ia12 < top12 (0x1p-126f)))
	    /* Force underflow for tiny x.  */
	    force_eval_float (x * x);
	  return x;
	}

      /* tan (x) ~= x + x^3 * P(x^2).  */
      float x2 = x * x;
      float y = eval_P (x2);
      return fmaf (x2, x * y, x);
    }
  /* Similar to other trigonometric routines, fast inaccurate reduction is
     performed for values of x from pi/4 up to RangeVal. In order to keep errors
     below 3.5ulps, we set the value of RangeVal to 2^17. This might differ for
     other trigonometric routines. Above this value more advanced but slower
     reduction techniques need to be implemented to reach a similar accuracy.
  */
  else if (ia12 < top12 (RangeVal))
    {
      /* Fast inaccurate reduction.  */
      r = reduce (x, &n);
    }
  else if (ia12 < 0x7f8)
    {
      /* Slow accurate reduction.  */
      uint32_t sign = ix & ~AbsMask;
      double dar = reduce_large (ia, &n);
      float ar = (float) dar;
      r = asfloat (asuint (ar) ^ sign);
    }
  else
    {
      /* tan(Inf or NaN) is NaN.  */
      return __math_invalidf (x);
    }

  /* If x lives in an interval where |tan(x)|
     - is finite then use an approximation of tangent in the form
       tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
     - grows to infinity then use an approximation of cotangent in the form
       cotan(z) ~ 1/z + z * Q(z^2), where the reciprocal can be computed early.
       Using symmetries of tangent and the identity tan(r) = cotan(pi/2 - r),
       we only need to change the sign of r to obtain tan(x) from cotan(r).
     This 2-interval approach requires 2 different sets of coefficients P and
     Q, where Q is a lower order polynomial than P.  */

  /* Determine if x lives in an interval where |tan(x)| grows to infinity.  */
  uint32_t alt = (uint32_t) n & 1;

  /* Perform additional reduction if required.  */
  float z = alt ? -r : r;

  /* Prepare backward transformation.  */
  float z2 = r * r;
  float offset = alt ? 1.0f / z : z;
  float scale = alt ? z : z * z2;

  /* Evaluate polynomial approximation of tan or cotan.  */
  float p = alt ? eval_Q (z2) : eval_P (z2);

  /* A unified way of assembling the result on both interval types.  */
  return fmaf (scale, p, offset);
}

PL_SIG (S, F, 1, tan, -3.1, 3.1)
PL_TEST_ULP (tanf, 2.80)
PL_TEST_INTERVAL (tanf, 0, 0xffff0000, 10000)
PL_TEST_INTERVAL (tanf, 0x1p-127, 0x1p-14, 50000)
PL_TEST_INTERVAL (tanf, -0x1p-127, -0x1p-14, 50000)
PL_TEST_INTERVAL (tanf, 0x1p-14, 0.7, 50000)
PL_TEST_INTERVAL (tanf, -0x1p-14, -0.7, 50000)
PL_TEST_INTERVAL (tanf, 0.7, 1.5, 50000)
PL_TEST_INTERVAL (tanf, -0.7, -1.5, 50000)
PL_TEST_INTERVAL (tanf, 1.5, 0x1p17, 50000)
PL_TEST_INTERVAL (tanf, -1.5, -0x1p17, 50000)
PL_TEST_INTERVAL (tanf, 0x1p17, 0x1p54, 50000)
PL_TEST_INTERVAL (tanf, -0x1p17, -0x1p54, 50000)
PL_TEST_INTERVAL (tanf, 0x1p54, inf, 50000)
PL_TEST_INTERVAL (tanf, -0x1p54, -inf, 50000)