diff options
author | Elliott Hughes <enh@google.com> | 2018-10-29 15:58:24 -0700 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2018-10-29 15:58:24 -0700 |
commit | e5f6e157cee4e12b2ef08ffd178e84fa955b1220 (patch) | |
tree | a1998cb9f24af3c314114d4daf50b0e625f09386 | |
parent | 745c75bfff7c74033ba5948ad91b3efbe069cbc0 (diff) | |
parent | f1a22e7d49fb48bf61e2416e8ba4fece1db6c0e6 (diff) | |
download | arm-optimized-routines-e5f6e157cee4e12b2ef08ffd178e84fa955b1220.tar.gz |
Merge remote-tracking branch 'aosp/upstream-master' into HEAD
am: f1a22e7d49
Change-Id: I935353d51d2affd9d83920f6a46e16be6f735c7f
-rw-r--r-- | METADATA | 13 | ||||
-rwxr-xr-x | auxiliary/remez.jl | 117 | ||||
-rw-r--r-- | math/cosf.c | 7 | ||||
-rw-r--r-- | math/log2_data.c | 26 | ||||
-rw-r--r-- | math/log_data.c | 26 | ||||
-rw-r--r-- | math/math_config.h | 2 | ||||
-rw-r--r-- | math/pow.c | 3 | ||||
-rw-r--r-- | math/pow_log_data.c | 22 | ||||
-rw-r--r-- | math/powf.c | 3 | ||||
-rw-r--r-- | math/sincosf.c | 7 | ||||
-rw-r--r-- | math/sincosf.h | 26 | ||||
-rw-r--r-- | math/sincosf_data.c | 4 | ||||
-rw-r--r-- | math/sinf.c | 7 |
13 files changed, 186 insertions, 77 deletions
@@ -1,8 +1,5 @@ name: "ARM-software/optimized-routines" -description: - "Optimized implementations of various library functions for ARM " - "architecture processors " - +description: "Optimized implementations of various library functions for ARM architecture processors " third_party { url { type: HOMEPAGE @@ -12,7 +9,11 @@ third_party { type: GIT value: "https://github.com/ARM-software/optimized-routines.git" } - version: "41ed0e600fb5a37ba96742fdbadd3048e62d5045" - last_upgrade_date { year: 2018 month: 7 day: 27 } + version: "e875f40f0b2ad71c5381a431e6d71829770c7ab7" license_type: NOTICE + last_upgrade_date { + year: 2018 + month: 10 + day: 29 + } } diff --git a/auxiliary/remez.jl b/auxiliary/remez.jl index 31266e9..01e62f9 100755 --- a/auxiliary/remez.jl +++ b/auxiliary/remez.jl @@ -18,6 +18,30 @@ # See the License for the specific language governing permissions and # limitations under the License. +import Base.\ + +# ---------------------------------------------------------------------- +# Helper functions to cope with different Julia versions. +if VERSION >= v"0.7.0" + array1d(T, d) = Array{T, 1}(undef, d) + array2d(T, d1, d2) = Array{T, 2}(undef, d1, d2) +else + array1d(T, d) = Array(T, d) + array2d(T, d1, d2) = Array(T, d1, d2) +end +if VERSION < v"0.5.0" + String = ASCIIString +end +if VERSION >= v"0.6.0" + # Use Base.invokelatest to run functions made using eval(), to + # avoid "world age" error + run(f, x...) = Base.invokelatest(f, x...) +else + # Prior to 0.6.0, invokelatest doesn't exist (but fortunately the + # world age problem also doesn't seem to exist) + run(f, x...) = f(x...) +end + # ---------------------------------------------------------------------- # Global variables configured by command-line options. floatsuffix = "" # adjusted by --floatsuffix @@ -26,7 +50,7 @@ epsbits = 256 # adjusted by --bits debug_facilities = Set() # adjusted by --debug full_output = false # adjusted by --full array_format = false # adjusted by --array -preliminary_commands = String[] # adjusted by --pre +preliminary_commands = array1d(String, 0) # adjusted by --pre # ---------------------------------------------------------------------- # Diagnostic and utility functions. @@ -56,7 +80,7 @@ macro debug(facility, printargs...) for arg in printargs printit = quote $printit - print($arg) + print($(esc(arg))) end end return quote @@ -156,7 +180,7 @@ end # xys Array of (x,y) pairs of BigFloats. # # Return value is a string. -function format_xylist(xys::Array{(BigFloat,BigFloat)}) +function format_xylist(xys::Array{Tuple{BigFloat,BigFloat}}) return ("[\n" * join([" "*string(x)*" -> "*string(y) for (x,y) in xys], "\n") * "\n]") @@ -354,8 +378,8 @@ function ratfn_leastsquares(f::Function, xvals::Array{BigFloat}, n, d, # Build the matrix. We're solving n+d+1 equations in n+d+1 # unknowns. (We actually have to return n+d+2 coefficients, but # one of them is hardwired to 1.) - matrix = Array(BigFloat, n+d+1, n+d+1) - vector = Array(BigFloat, n+d+1) + matrix = array2d(BigFloat, n+d+1, n+d+1) + vector = array1d(BigFloat, n+d+1) for i = 0:1:n # Equation obtained by differentiating with respect to p_i, # i.e. the numerator coefficient of x^i. @@ -484,7 +508,7 @@ end # the extremum location and its value (i.e. y=f(x)). function find_extrema(f::Function, grid::Array{BigFloat}) len = length(grid) - extrema = Array((BigFloat, BigFloat), 0) + extrema = array1d(Tuple{BigFloat, BigFloat}, 0) for i = 1:1:len # We have to provide goldensection() with three points # bracketing the extremum. If the extremum is at one end of @@ -529,11 +553,11 @@ end # Returns a new array of (x,y) pairs which is a subsequence of the # original sequence. (So, in particular, if the input was sorted by x # then so will the output be.) -function winnow_extrema(extrema::Array{(BigFloat,BigFloat)}, n) +function winnow_extrema(extrema::Array{Tuple{BigFloat,BigFloat}}, n) # best[i,j] gives the best sequence so far of length i and with # sign j (where signs are coded as 1=positive, 2=negative), in the # form of a tuple (cost, actual array of x,y pairs). - best = fill((BigFloat(0), Array((BigFloat,BigFloat),0)), n, 2) + best = fill((BigFloat(0), array1d(Tuple{BigFloat,BigFloat}, 0)), n, 2) for (x,y) = extrema if y > 0 @@ -624,8 +648,8 @@ function ratfn_equal_deviation(f::Function, coords::Array{BigFloat}, # in which the p_i and e are the variables, and the powers of # x and calls to w and f are the coefficients. - matrix = Array(BigFloat, n+2, n+2) - vector = Array(BigFloat, n+2) + matrix = array2d(BigFloat, n+2, n+2) + vector = array1d(BigFloat, n+2) currsign = +1 for i = 1:1:n+2 x = coords[i] @@ -678,8 +702,8 @@ function ratfn_equal_deviation(f::Function, coords::Array{BigFloat}, # based on the first n+d+1 of the n+d+2 coordinates, and # see what the error turns out to be at the final # coordinate. - matrix = Array(BigFloat, n+d+1, n+d+1) - vector = Array(BigFloat, n+d+1) + matrix = array2d(BigFloat, n+d+1, n+d+1) + vector = array1d(BigFloat, n+d+1) currsign = +1 for i = 1:1:n+d+1 x = coords[i] @@ -787,7 +811,7 @@ end # means that the Chebyshev alternation theorem guarantees # that any other function of the same degree must exceed # the error of this one at at least one of those points. -function ratfn_minimax(f::Function, interval::(BigFloat,BigFloat), n, d, +function ratfn_minimax(f::Function, interval::Tuple{BigFloat,BigFloat}, n, d, w = (x,y)->BigFloat(1)) # We start off by finding a least-squares approximation. This # doesn't need to be perfect, but if we can get it reasonably good @@ -934,7 +958,7 @@ function test() # Test leastsquares rational functions. println("Leastsquares test 1:") n = 10000 - a = Array(BigFloat, n+1) + a = array1d(BigFloat, n+1) for i = 0:1:n a[1+i] = i/BigFloat(n) end @@ -947,7 +971,7 @@ function test() # Test golden section search. println("Golden section test 1:") x, y = goldensection(x->sin(x), - BigFloat(0), BigFloat("0.1"), BigFloat(4)) + BigFloat(0), BigFloat(1)/10, BigFloat(4)) println(" ", x, " -> ", y) test(approx_eq(x, asin(BigFloat(1)))) test(approx_eq(y, 1)) @@ -955,7 +979,7 @@ function test() # Test extrema-winnowing algorithm. println("Winnow test 1:") extrema = [(x, sin(20*x)*sin(197*x)) - for x in BigFloat(0):BigFloat("0.001"):BigFloat(1)] + for x in BigFloat(0):BigFloat(1)/1000:BigFloat(1)] winnowed = winnow_extrema(extrema, 12) println(" ret = ", format_xylist(winnowed)) prevx, prevy = -1, 0 @@ -973,7 +997,7 @@ function test() println(" ",e) println(" ",ratfn_to_string(nc, dc)) test(0 < e < 1e-3) - for x = 0:BigFloat(0.001):1 + for x = 0:BigFloat(1)/1000:1 test(abs(ratfn_eval(nc, dc, x) - exp(x)) <= e * 1.0000001) end @@ -982,7 +1006,7 @@ function test() println(" ",e) println(" ",ratfn_to_string(nc, dc)) test(0 < e < 1e-3) - for x = 0:BigFloat(0.001):1 + for x = 0:BigFloat(1)/1000:1 test(abs(ratfn_eval(nc, dc, x) - exp(x)) <= e * 1.0000001) end @@ -992,7 +1016,7 @@ function test() println(" ",e) println(" ",ratfn_to_string(nc, dc)) test(0 < e < 1e-3) - for x = 0:BigFloat(0.001):1 + for x = 0:BigFloat(1)/1000:1 test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001) end @@ -1002,7 +1026,7 @@ function test() println(" ",e) println(" ",ratfn_to_string(nc, dc)) test(0 < e < 1e-3) - for x = 0:BigFloat(0.001):1 + for x = 0:BigFloat(1)/1000:1 test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001) end @@ -1012,12 +1036,12 @@ function test() println(" ",e) println(" ",ratfn_to_string(nc, dc)) test(0 < e < 1e-3) - for x = 0:BigFloat(0.001):1 + for x = 0:BigFloat(1)/1000:1 test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001) end total = passes + fails - @printf "%d passes %d fails %d total\n" passes fails total + println(passes, " passes ", fails, " fails ", total, " total") end # ---------------------------------------------------------------------- @@ -1206,14 +1230,14 @@ function main() end for preliminary_command in preliminary_commands - eval(parse(preliminary_command)) + eval(Meta.parse(preliminary_command)) end - lo = BigFloat(eval(parse(argwords[1]))) - hi = BigFloat(eval(parse(argwords[2]))) - n = int(argwords[3]) - d = int(argwords[4]) - f = eval(parse("x -> " * argwords[5])) + lo = BigFloat(eval(Meta.parse(argwords[1]))) + hi = BigFloat(eval(Meta.parse(argwords[2]))) + n = parse(Int,argwords[3]) + d = parse(Int,argwords[4]) + f = eval(Meta.parse("x -> " * argwords[5])) # Wrap the user-provided function with a function of our own. This # arranges to detect silly FP values (inf,nan) early and diagnose @@ -1221,7 +1245,7 @@ function main() # function in case you suspect it's doing the wrong thing at some # special-case point. function func(x) - y = f(x) + y = run(f,x) @debug("f", x, " -> ", y) if !isfinite(y) error("f(" * string(x) * ") returned non-finite value " * string(y)) @@ -1231,9 +1255,9 @@ function main() if nargs == 6 # Wrap the user-provided weight function similarly. - w = eval(parse("(x,y) -> " * argwords[6])) + w = eval(Meta.parse("(x,y) -> " * argwords[6])) function wrapped_weight(x,y) - ww = w(x,y) + ww = run(w,x,y) if !isfinite(ww) error("w(" * string(x) * "," * string(y) * ") returned non-finite value " * string(ww)) @@ -1283,9 +1307,11 @@ end what_to_do = main doing_opts = true -argwords = Array(ASCIIString, 0) +argwords = array1d(String, 0) for arg = ARGS - if doing_opts && beginswith(arg, "-") + global doing_opts, what_to_do, argwords + global full_output, array_format, xvarname, floatsuffix, epsbits + if doing_opts && startswith(arg, "-") if arg == "--" doing_opts = false elseif arg == "--help" @@ -1296,18 +1322,19 @@ for arg = ARGS full_output = true elseif arg == "--array" array_format = true - elseif beginswith(arg, "--debug=") - enable_debug(arg[length("--debug=")+1:]) - elseif beginswith(arg, "--variable=") - xvarname = arg[length("--variable=")+1:] - elseif beginswith(arg, "--suffix=") - floatsuffix = arg[length("--suffix=")+1:] - elseif beginswith(arg, "--bits=") - epsbits = int(arg[length("--bits=")+1:]) - elseif beginswith(arg, "--bigfloatbits=") - set_bigfloat_precision(int(arg[length("--bigfloatbits=")+1:])) - elseif beginswith(arg, "--pre=") - push!(preliminary_commands, arg[length("--pre=")+1:]) + elseif startswith(arg, "--debug=") + enable_debug(arg[length("--debug=")+1:end]) + elseif startswith(arg, "--variable=") + xvarname = arg[length("--variable=")+1:end] + elseif startswith(arg, "--suffix=") + floatsuffix = arg[length("--suffix=")+1:end] + elseif startswith(arg, "--bits=") + epsbits = parse(Int,arg[length("--bits=")+1:end]) + elseif startswith(arg, "--bigfloatbits=") + set_bigfloat_precision( + parse(Int,arg[length("--bigfloatbits=")+1:end])) + elseif startswith(arg, "--pre=") + push!(preliminary_commands, arg[length("--pre=")+1:end]) else error("unrecognised option: ", arg) end diff --git a/math/cosf.c b/math/cosf.c index 7d57d06..5a635d1 100644 --- a/math/cosf.c +++ b/math/cosf.c @@ -26,11 +26,10 @@ #include "math_config.h" #include "sincosf.h" -/* Fast cosf implementation. Worst-case ULP is 0.56072, maximum relative - error is 0.5303p-23. A single-step signed range reduction is used for +/* Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative + error is 0.5303 * 2^-23. A single-step range reduction is used for small values. Large inputs have their range reduced using fast integer - arithmetic. -*/ + arithmetic. */ float cosf (float y) { diff --git a/math/log2_data.c b/math/log2_data.c index 356662f..d2eba89 100644 --- a/math/log2_data.c +++ b/math/log2_data.c @@ -54,6 +54,32 @@ const struct log2_data __log2_data = { 0x1.a6225e117f92ep-3, #endif }, +/* Algorithm: + + x = 2^k z + log2(x) = k + log2(c) + log2(z/c) + log2(z/c) = poly(z/c - 1) + +where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls +into the ith one, then table entries are computed as + + tab[i].invc = 1/c + tab[i].logc = (double)log2(c) + tab2[i].chi = (double)c + tab2[i].clo = (double)(c - (double)c) + +where c is near the center of the subinterval and is chosen by trying +-2^29 +floating point invc candidates around 1/center and selecting one for which + + 1) the rounding error in 0x1.8p10 + logc is 0, + 2) the rounding error in z - chi - clo is < 0x1p-64 and + 3) the rounding error in (double)log2(c) is minimized (< 0x1p-68). + +Note: 1) ensures that k + logc can be computed without rounding error, 2) +ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a +single rounding error when there is no fast fma for z*invc - 1, 3) ensures +that logc + poly(z/c - 1) has small error, however near x == 1 when +|log2(x)| < 0x1p-4, this is not enough so that is special cased. */ .tab = { #if N == 64 {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1}, diff --git a/math/log_data.c b/math/log_data.c index 88c8a2e..2e2e478 100644 --- a/math/log_data.c +++ b/math/log_data.c @@ -98,6 +98,32 @@ const struct log_data __log_data = { 0x1.2493c29331a5cp-3, #endif }, +/* Algorithm: + + x = 2^k z + log(x) = k ln2 + log(c) + log(z/c) + log(z/c) = poly(z/c - 1) + +where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls +into the ith one, then table entries are computed as + + tab[i].invc = 1/c + tab[i].logc = (double)log(c) + tab2[i].chi = (double)c + tab2[i].clo = (double)(c - (double)c) + +where c is near the center of the subinterval and is chosen by trying +-2^29 +floating point invc candidates around 1/center and selecting one for which + + 1) the rounding error in 0x1.8p9 + logc is 0, + 2) the rounding error in z - chi - clo is < 0x1p-66 and + 3) the rounding error in (double)log(c) is minimized (< 0x1p-66). + +Note: 1) ensures that k*ln2hi + logc can be computed without rounding error, +2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to +a single rounding error when there is no fast fma for z*invc - 1, 3) ensures +that logc + poly(z/c - 1) has small error, however near x == 1 when +|log(x)| < 0x1p-4, this is not enough so that is special cased. */ .tab = { #if N == 64 {0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2}, diff --git a/math/math_config.h b/math/math_config.h index ce6fac8..ddac41c 100644 --- a/math/math_config.h +++ b/math/math_config.h @@ -56,7 +56,7 @@ /* Compiler can inline fma as a single instruction. */ #ifndef HAVE_FAST_FMA -# ifdef FP_FAST_FMA +# if defined FP_FAST_FMA || __aarch64__ # define HAVE_FAST_FMA 1 # else # define HAVE_FAST_FMA 0 @@ -256,7 +256,8 @@ exp_inline (double x, double xtail, uint32_t sign_bias) return scale + scale * tmp; } -/* Returns 0 if not int, 1 if odd int, 2 if even int. */ +/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is + the bit representation of a non-zero finite floating-point value. */ static inline int checkint (uint64_t iy) { diff --git a/math/pow_log_data.c b/math/pow_log_data.c index 1b73b7f..6d3bf5e 100644 --- a/math/pow_log_data.c +++ b/math/pow_log_data.c @@ -38,6 +38,28 @@ const struct pow_log_data __pow_log_data = { -0x1.0002b8b263fc3p-3 * -8, #endif }, +/* Algorithm: + + x = 2^k z + log(x) = k ln2 + log(c) + log(z/c) + log(z/c) = poly(z/c - 1) + +where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals +and z falls into the ith one, then table entries are computed as + + tab[i].invc = 1/c + tab[i].logc = round(0x1p43*log(c))/0x1p43 + tab[i].logctail = (double)(log(c) - logc) + +where c is chosen near the center of the subinterval such that 1/c has only a +few precision bits so z/c - 1 is exactly representible as double: + + 1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2 + +Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97, +the last few bits of logc are rounded away so k*ln2hi + logc has no rounding +error and the interval for z is selected such that near x == 1, where log(x) +is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */ .tab = { #if N == 128 #define A(a, b, c) {a, 0, b, c}, diff --git a/math/powf.c b/math/powf.c index 5ea1e71..c4ea178 100644 --- a/math/powf.c +++ b/math/powf.c @@ -121,7 +121,8 @@ exp2_inline (double_t xd, uint32_t sign_bias) return y; } -/* Returns 0 if not int, 1 if odd int, 2 if even int. */ +/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is + the bit representation of a non-zero finite floating-point value. */ static inline int checkint (uint32_t iy) { diff --git a/math/sincosf.c b/math/sincosf.c index 5b13d72..1af41f3 100644 --- a/math/sincosf.c +++ b/math/sincosf.c @@ -26,11 +26,10 @@ #include "math_config.h" #include "sincosf.h" -/* Fast sincosf implementation. Worst-case ULP is 0.56072, maximum relative - error is 0.5303p-23. A single-step signed range reduction is used for +/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative + error is 0.5303 * 2^-23. A single-step range reduction is used for small values. Large inputs have their range reduced using fast integer - arithmetic. -*/ + arithmetic. */ void sincosf (float y, float *sinp, float *cosp) { diff --git a/math/sincosf.h b/math/sincosf.h index 4aced58..395a30e 100644 --- a/math/sincosf.h +++ b/math/sincosf.h @@ -21,19 +21,25 @@ #include <math.h> #include "math_config.h" -/* PI * 2^-64. */ -static const double pi64 = 0x1.921FB54442D18p-62; +/* 2PI * 2^-64. */ +static const double pi63 = 0x1.921FB54442D18p-62; /* PI / 4. */ static const double pio4 = 0x1.921FB54442D18p-1; +/* The constants and polynomials for sine and cosine. */ typedef struct { - double sign[4]; - double hpi_inv, hpi, c0, c1, c2, c3, c4, s1, s2, s3; + double sign[4]; /* Sign of sine in quadrants 0..3. */ + double hpi_inv; /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS). */ + double hpi; /* PI / 2. */ + double c0, c1, c2, c3, c4; /* Cosine polynomial. */ + double s1, s2, s3; /* Sine polynomial. */ } sincos_t; +/* Polynomial data (the cosine polynomial is negated in the 2nd entry). */ extern const sincos_t __sincosf_table[2] HIDDEN; +/* Table with 4/PI to 192 bit precision. */ extern const uint32_t __inv_pio4[] HIDDEN; /* Top 12 bits of the float representation with the sign bit cleared. */ @@ -107,18 +113,20 @@ sinf_poly (double x, double x2, const sincos_t *p, int n) X as a value between -PI/4 and PI/4 and store the quadrant in NP. The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4, - only 2 multiplies are required and the result is accurate for |X| <= 120.0. - Use round/lround if inlined, otherwise convert to int. To avoid inaccuracies - introduced by truncating negative values, compute the quadrant * 2^24. */ + the result is accurate for |X| <= 120.0. */ static inline double reduce_fast (double x, const sincos_t *p, int *np) { double r; #if TOINT_INTRINSICS + /* Use fast round and lround instructions when available. */ r = x * p->hpi_inv; *np = converttoint (r); return x - roundtoint (r) * p->hpi; #else + /* Use scaled float to int conversion with explicit rounding. + hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31. + This avoids inaccuracies introduced by truncating negative values. */ r = x * p->hpi_inv; int n = ((int32_t)r + 0x800000) >> 24; *np = n; @@ -126,7 +134,7 @@ reduce_fast (double x, const sincos_t *p, int *np) #endif } -/* Reduce the range of XI to a multiple of PI/4 using fast integer arithmetic. +/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic. XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored). Return the modulo between -PI/4 and PI/4 and store the quadrant in NP. Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit @@ -153,5 +161,5 @@ reduce_large (uint32_t xi, int *np) res0 -= n << 62; double x = (int64_t)res0; *np = n; - return x * pi64; + return x * pi63; } diff --git a/math/sincosf_data.c b/math/sincosf_data.c index deaed1b..3e7e5cd 100644 --- a/math/sincosf_data.c +++ b/math/sincosf_data.c @@ -30,7 +30,7 @@ const sincos_t __sincosf_table[2] = { { { 1.0, -1.0, -1.0, 1.0 }, -#if HAVE_FAST_ROUND +#if TOINT_INTRINSICS 0x1.45F306DC9C883p-1, #else 0x1.45F306DC9C883p+23, @@ -47,7 +47,7 @@ const sincos_t __sincosf_table[2] = }, { { 1.0, -1.0, -1.0, 1.0 }, -#if HAVE_FAST_ROUND +#if TOINT_INTRINSICS 0x1.45F306DC9C883p-1, #else 0x1.45F306DC9C883p+23, diff --git a/math/sinf.c b/math/sinf.c index 5369d50..3f35923 100644 --- a/math/sinf.c +++ b/math/sinf.c @@ -25,11 +25,10 @@ #include "math_config.h" #include "sincosf.h" -/* Fast sinf implementation. Worst-case ULP is 0.56072, maximum relative - error is 0.5303p-23. A single-step signed range reduction is used for +/* Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative + error is 0.5303 * 2^-23. A single-step range reduction is used for small values. Large inputs have their range reduced using fast integer - arithmetic. -*/ + arithmetic. */ float sinf (float y) { |