1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
/*
* Single-precision vector log2 function.
*
* Copyright (c) 2022, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include "v_math.h"
#include "math_config.h"
#include "pl_sig.h"
#include "pl_test.h"
#if V_SUPPORTED
#define N (1 << V_LOG2F_TABLE_BITS)
#define T __v_log2f_data.tab
#define A __v_log2f_data.poly
#define OFF 0x3f330000
#define SubnormLim 0x800000
#define One v_u32 (0x3f800000)
static float
handle_special (float x)
{
if (x != x)
/* NaN - return NaN but do not trigger invalid. */
return x;
if (x < 0)
/* log2f(-anything) = NaN. */
return __math_invalidf (x);
if (x == 0)
/* log2f(0) = Inf. */
return __math_divzerof (1);
/* log2f(Inf) = Inf. */
return x;
}
static float
normalise (float x)
{
return asfloat (asuint (x * 0x1p23f) - (23 << 23));
}
#ifdef SCALAR
#define DEFINE_LOOKUP_FUNC(p) \
static inline float lookup_##p (uint32_t i) { return T[i].p; }
#else
#define DEFINE_LOOKUP_FUNC(p) \
static inline v_f32_t lookup_##p (v_u32_t i) \
{ \
return (v_f32_t){T[i[0]].p, T[i[1]].p, T[i[2]].p, T[i[3]].p}; \
}
#endif
DEFINE_LOOKUP_FUNC (invc_lo)
DEFINE_LOOKUP_FUNC (invc_hi)
DEFINE_LOOKUP_FUNC (logc)
/* Single-precision vector log2 routine. Implements the same algorithms as
scalar log2f, but using only single-precision arithmetic, with invc
represented as a two-limb float. Accurate to 2.6 ulp. The maximum error is
near sqrt(2):
__v_log2f(0x1.6a0484p+0) got 0x1.ffea02p-2
want 0x1.ffea08p-2. */
VPCS_ATTR v_f32_t V_NAME (log2f) (v_f32_t x)
{
v_u32_t ix = v_as_u32_f32 (x);
/* x is +-Inf, +-NaN, 0 or -ve. */
v_u32_t special = v_cond_u32 (ix >= 0x7f800000) | v_cond_u32 (ix == 0);
/* |x| < 2^126 (i.e. x is subnormal). */
v_u32_t subnorm = v_cond_u32 (ix < SubnormLim);
/* Sidestep special lanes so they do not inadvertently trigger fenv
exceptions. They will be fixed up later. */
if (unlikely (v_any_u32 (special)))
ix = v_sel_u32 (special, One, ix);
if (unlikely (v_any_u32 (subnorm)))
{
/* Normalize any subnormals. */
v_f32_t tmp_x = v_as_f32_u32 (ix);
ix = v_as_u32_f32 (v_call_f32 (normalise, tmp_x, tmp_x, subnorm));
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
v_u32_t tmp = ix - OFF;
v_u32_t i = (tmp >> (23 - V_LOG2F_TABLE_BITS)) % N;
v_u32_t top = tmp & 0xff800000;
v_u32_t iz = ix - top;
v_f32_t k = v_to_f32_s32 (v_as_s32_u32 (tmp) >> 23); /* Arithmetic shift. */
v_f32_t z = v_as_f32_u32 (iz);
v_f32_t invc_lo = lookup_invc_lo (i);
v_f32_t invc_hi = lookup_invc_hi (i);
v_f32_t logc = lookup_logc (i);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k. */
v_f32_t r = v_fma_f32 (z, invc_hi, v_f32 (-1));
r = v_fma_f32 (z, invc_lo, r);
v_f32_t y0 = logc + k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
v_f32_t r2 = r * r;
v_f32_t y = v_fma_f32 (v_f32 (A[1]), r, v_f32 (A[2]));
y = v_fma_f32 (v_f32 (A[0]), r2, y);
v_f32_t p = v_fma_f32 (v_f32 (A[3]), r, y0);
y = v_fma_f32 (y, r2, p);
if (unlikely (v_any_u32 (special)))
return v_call_f32 (handle_special, x, y, special);
return y;
}
VPCS_ALIAS
PL_SIG (V, F, 1, log2, 0.01, 11.1)
PL_TEST_ULP (V_NAME (log2f), 2.10)
PL_TEST_EXPECT_FENV (V_NAME (log2f), WANT_ERRNO)
#endif
|