diff options
author | Add joeram01 <joe.ramsay@arm.com> | 2022-10-31 12:23:43 +0000 |
---|---|---|
committer | Joe Ramsay <joe.ramsay@arm.com> | 2022-11-09 14:51:22 +0000 |
commit | 43140a886fa8f0907ec8545d57534cc4343b8b9c (patch) | |
tree | 0497615f732d68d9ab2341462c45bcc4b52d05ff | |
parent | 78a876ad601673d07fe4600d7ebfcf46587d819d (diff) | |
download | arm-optimized-routines-43140a886fa8f0907ec8545d57534cc4343b8b9c.tar.gz |
pl/math: Add scalar and vector/Neon coshf
New routines use single-precision exp, which has been copied from
math/. Scalar is accurate to 1.9 ULP, Neon to 2.4 ULP.
Also use the new expf helper in scalar sinhf.
-rw-r--r-- | pl/math/coshf_1u9.c | 60 | ||||
-rw-r--r-- | pl/math/expf.c | 76 | ||||
-rw-r--r-- | pl/math/expf_data.c | 31 | ||||
-rw-r--r-- | pl/math/include/mathlib.h | 5 | ||||
-rw-r--r-- | pl/math/math_config.h | 9 | ||||
-rw-r--r-- | pl/math/s_coshf_2u4.c | 6 | ||||
-rw-r--r-- | pl/math/sinhf_2u3.c | 6 | ||||
-rw-r--r-- | pl/math/test/mathbench_funcs.h | 6 | ||||
-rwxr-xr-x | pl/math/test/runulp.sh | 22 | ||||
-rw-r--r-- | pl/math/test/testcases/directed/coshf.tst | 15 | ||||
-rw-r--r-- | pl/math/test/ulp_funcs.h | 4 | ||||
-rw-r--r-- | pl/math/test/ulp_wrappers.h | 2 | ||||
-rw-r--r-- | pl/math/v_coshf_2u4.c | 62 | ||||
-rw-r--r-- | pl/math/vn_coshf_2u4.c | 12 |
14 files changed, 313 insertions, 3 deletions
diff --git a/pl/math/coshf_1u9.c b/pl/math/coshf_1u9.c new file mode 100644 index 0000000..ca3f767 --- /dev/null +++ b/pl/math/coshf_1u9.c @@ -0,0 +1,60 @@ +/* + * Single-precision cosh(x) function. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "math_config.h" + +#define AbsMask 0x7fffffff +#define TinyBound 0x20000000 /* 0x1p-63: Round to 1 below this. */ +#define SpecialBound \ + 0x42ad496c /* 0x1.5a92d8p+6: expf overflows above this, so have to use \ + special case. */ + +float +optr_aor_exp_f32 (float); + +static NOINLINE float +specialcase (float x, uint32_t iax) +{ + if (iax == 0x7f800000) + return INFINITY; + if (iax > 0x7f800000) + return __math_invalidf (x); + if (iax <= TinyBound) + /* For tiny x, avoid underflow by just returning 1. */ + return 1; + /* Otherwise SpecialBound <= |x| < Inf. x is too large to calculate exp(x) + without overflow, so use exp(|x|/2) instead. For large x cosh(x) is + dominated by exp(x), so return: + cosh(x) ~= (exp(|x|/2))^2 / 2. */ + float t = optr_aor_exp_f32 (asfloat (iax) / 2); + return (0.5 * t) * t; +} + +/* Approximation for single-precision cosh(x) using exp. + cosh(x) = (exp(x) + exp(-x)) / 2. + The maximum error is 1.89 ULP, observed for |x| > SpecialBound: + coshf(0x1.65898cp+6) got 0x1.f00aep+127 want 0x1.f00adcp+127. + The maximum error observed for TinyBound < |x| < SpecialBound is 1.02 ULP: + coshf(0x1.50a3cp+0) got 0x1.ff21dcp+0 want 0x1.ff21dap+0. */ +float +coshf (float x) +{ + uint32_t ix = asuint (x); + uint32_t iax = ix & AbsMask; + float ax = asfloat (iax); + + if (unlikely (iax <= TinyBound || iax >= SpecialBound)) + { + /* x is tiny, large or special. */ + return specialcase (x, iax); + } + + /* Compute cosh using the definition: + coshf(x) = exp(x) / 2 + exp(-x) / 2. */ + float t = optr_aor_exp_f32 (ax); + return 0.5f * t + 0.5f / t; +} diff --git a/pl/math/expf.c b/pl/math/expf.c new file mode 100644 index 0000000..fa03b05 --- /dev/null +++ b/pl/math/expf.c @@ -0,0 +1,76 @@ +/* + * Single-precision e^x function. + * + * Copyright (c) 2017-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include <math.h> +#include <stdint.h> +#include "math_config.h" + +/* +EXPF_TABLE_BITS = 5 +EXPF_POLY_ORDER = 3 + +ULP error: 0.502 (nearest rounding.) +Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.) +Wrong count: 170635 (all nearest rounding wrong results with fma.) +Non-nearest ULP error: 1 (rounded ULP error) +*/ + +#define N (1 << EXPF_TABLE_BITS) +#define InvLn2N __expf_data.invln2_scaled +#define T __expf_data.tab +#define C __expf_data.poly_scaled + +static inline uint32_t +top12 (float x) +{ + return asuint (x) >> 20; +} + +float +optr_aor_exp_f32 (float x) +{ + uint32_t abstop; + uint64_t ki, t; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, xd, z, r, r2, y, s; + + xd = (double_t) x; + abstop = top12 (x) & 0x7ff; + if (unlikely (abstop >= top12 (88.0f))) + { + /* |x| >= 88 or x is nan. */ + if (asuint (x) == asuint (-INFINITY)) + return 0.0f; + if (abstop >= top12 (INFINITY)) + return x + x; + if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ + return __math_oflowf (0); + if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ + return __math_uflowf (0); + } + + /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ + z = InvLn2N * xd; + + /* Round and convert z to int, the result is in [-150*N, 128*N] and + ideally nearest int is used, otherwise the magnitude of r can be + bigger which gives larger approximation error. */ + kd = roundtoint (z); + ki = converttoint (z); + r = z - kd; + + /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ + t = T[ki % N]; + t += ki << (52 - EXPF_TABLE_BITS); + s = asdouble (t); + z = C[0] * r + C[1]; + r2 = r * r; + y = C[2] * r + 1; + y = z * r2 + y; + y = y * s; + return eval_as_float (y); +} diff --git a/pl/math/expf_data.c b/pl/math/expf_data.c new file mode 100644 index 0000000..1525fcc --- /dev/null +++ b/pl/math/expf_data.c @@ -0,0 +1,31 @@ +/* + * Coeffs and table entries for single-precision exp. Copied from + * math/exp2f_data.c, with EXP2F_TABLE_BITS == 32. + * + * Copyright (c) 2017-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "math_config.h" + +#define N (1 << EXPF_TABLE_BITS) + +const struct expf_data __expf_data = { + /* tab[i] = uint(2^(i/N)) - (i << 52-BITS) + used for computing 2^(k/N) for an int |k| < 150 N as + double(tab[k%N] + (k << 52-BITS)) */ + .tab = { +0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, +0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, +0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, +0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585, +0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13, +0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, +0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, +0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, + }, + .invln2_scaled = 0x1.71547652b82fep+0 * N, + .poly_scaled = { + 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N, + }, +}; diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h index e99b8eb..55223a3 100644 --- a/pl/math/include/mathlib.h +++ b/pl/math/include/mathlib.h @@ -12,6 +12,7 @@ float acoshf (float); float asinhf (float); float atan2f (float, float); +float coshf (float); float erfcf (float); float erff (float); float expm1f (float); @@ -30,6 +31,7 @@ double log1p (double); float __s_asinhf (float); float __s_atanf (float); float __s_atan2f (float, float); +float __s_coshf (float); float __s_erfcf (float); float __s_erff (float); float __s_expm1f (float); @@ -64,6 +66,7 @@ __f32x4_t __v_atanf (__f32x4_t); __f64x2_t __v_atan (__f64x2_t); __f32x4_t __v_atan2f (__f32x4_t, __f32x4_t); __f64x2_t __v_atan2 (__f64x2_t, __f64x2_t); +__f32x4_t __v_coshf (__f32x4_t); __f32x4_t __v_erff (__f32x4_t); __f64x2_t __v_erf (__f64x2_t); __f32x4_t __v_erfcf (__f32x4_t); @@ -87,6 +90,7 @@ __vpcs __f32x4_t __vn_atanf (__f32x4_t); __vpcs __f64x2_t __vn_atan (__f64x2_t); __vpcs __f32x4_t __vn_atan2f (__f32x4_t, __f32x4_t); __vpcs __f64x2_t __vn_atan2 (__f64x2_t, __f64x2_t); +__vpcs __f32x4_t __vn_coshf (__f32x4_t); __vpcs __f32x4_t __vn_erff (__f32x4_t); __vpcs __f64x2_t __vn_erf (__f64x2_t); __vpcs __f32x4_t __vn_erfcf (__f32x4_t); @@ -107,6 +111,7 @@ __vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t); __vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t); __vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t); __vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t); +__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t); __vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t); __vpcs __f32x4_t _ZGVnN4v_erfcf (__f32x4_t); diff --git a/pl/math/math_config.h b/pl/math/math_config.h index 2d39e91..dc660f1 100644 --- a/pl/math/math_config.h +++ b/pl/math/math_config.h @@ -541,4 +541,13 @@ extern const float __sv_expf_poly[SV_EXPF_POLY_ORDER - 1] HIDDEN; #define EXPM1F_POLY_ORDER 5 extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN; +#define EXPF_TABLE_BITS 5 +#define EXPF_POLY_ORDER 3 +extern const struct expf_data +{ + uint64_t tab[1 << EXPF_TABLE_BITS]; + double invln2_scaled; + double poly_scaled[EXPF_POLY_ORDER]; +} __expf_data HIDDEN; + #endif diff --git a/pl/math/s_coshf_2u4.c b/pl/math/s_coshf_2u4.c new file mode 100644 index 0000000..1b7091b --- /dev/null +++ b/pl/math/s_coshf_2u4.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +#define SCALAR 1 +#include "v_coshf_2u4.c" diff --git a/pl/math/sinhf_2u3.c b/pl/math/sinhf_2u3.c index a0459ca..c616dac 100644 --- a/pl/math/sinhf_2u3.c +++ b/pl/math/sinhf_2u3.c @@ -16,8 +16,8 @@ 0x42b2d4fd /* 0x1.65a9fap+6, minimum positive value for which sinhf should \ overflow. */ -double -__exp_dd (double, double); +float +optr_aor_exp_f32 (float); /* Approximation for single-precision sinh(x) using expm1. sinh(x) = (exp(x) - exp(-x)) / 2. @@ -52,7 +52,7 @@ sinhf (float x) ~= (exp(|x| / 2)) ^ 2 / -2 for x < 0. Greatest error in this region is 1.89 ULP: sinhf(0x1.65898cp+6) got 0x1.f00aep+127 want 0x1.f00adcp+127. */ - float e = __exp_dd (ax / 2, 0); + float e = optr_aor_exp_f32 (ax / 2); return (e * halfsign) * e; } diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h index ee4440d..544a7d1 100644 --- a/pl/math/test/mathbench_funcs.h +++ b/pl/math/test/mathbench_funcs.h @@ -10,6 +10,7 @@ F (asinhf, -10.0, 10.0) F (atanf, -10.0, 10.0) {"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}}, F (cosf, -3.1, 3.1) +F (coshf, -10.0, 10.0) F (erfcf, -4.0, 10.0) F (erff, -4.0, 4.0) F (expm1f, -9.9, 9.9) @@ -39,6 +40,7 @@ F (__s_atanf, -10.0, 10.0) D (__s_atan, -10.0, 10.0) {"__s_atan2f", 'f', 0, -10.0, 10.0, {.f = __s_atan2f_wrap}}, {"__s_atan2", 'd', 0, -10.0, 10.0, {.d = __s_atan2_wrap}}, +F (__s_coshf, -10.0, 10.0) F (__s_erff, -4.0, 4.0) D (__s_erf, -6.0, 6.0) F (__s_erfcf, -6.0, 28.0) @@ -58,6 +60,7 @@ VF (__v_atanf, -10.0, 10.0) VD (__v_atan, -10.0, 10.0) {"__v_atan2f", 'f', 'v', -10.0, 10.0, {.vf = __v_atan2f_wrap}}, {"__v_atan2", 'd', 'v', -10.0, 10.0, {.vd = __v_atan2_wrap}}, +VF (__v_coshf, -10.0, 10.0) VF (__v_erff, -4.0, 4.0) VD (__v_erf, -6.0, 6.0) VF (__v_erfcf, -6.0, 28.0) @@ -87,6 +90,9 @@ VND (_ZGVnN2v_atan, -10.0, 10.0) {"__vn_atan2", 'd', 'n', -10.0, 10.0, {.vnd = __vn_atan2_wrap}}, {"_ZGVnN2vv_atan2", 'd', 'n', -10.0, 10.0, {.vnd = _Z_atan2_wrap}}, +VNF (__vn_coshf, -10.0, 10.0) +VNF (_ZGVnN4v_coshf, -10.0, 10.0) + VNF (__vn_erff, -4.0, 4.0) VNF (_ZGVnN4v_erff, -4.0, 4.0) diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh index 1690d5f..d30e707 100755 --- a/pl/math/test/runulp.sh +++ b/pl/math/test/runulp.sh @@ -171,6 +171,14 @@ t sinhf -0x1.62e43p+6 -0x1.65a9fap+6 100 t sinhf 0x1.65a9fap+6 inf 100 t sinhf -0x1.65a9fap+6 -inf 100 +L=1.89 +t coshf 0 0x1p-63 100 +t coshf 0 0x1.5a92d8p+6 80000 +t coshf 0x1.5a92d8p+6 inf 2000 +t coshf -0 -0x1p-63 100 +t coshf -0 -0x1.5a92d8p+6 80000 +t coshf -0x1.5a92d8p+6 -inf 2000 + done # vector functions @@ -358,6 +366,15 @@ range_sinhf=' -0x1.65a9fap+6 -inf 100 ' +range_coshf=' + 0 0x1p-63 100 + 0 0x1.5a92d8p+6 80000 + 0x1.5a92d8p+6 inf 2000 + -0 -0x1p-63 100 + -0 -0x1.5a92d8p+6 80000 + -0x1.5a92d8p+6 -inf 2000 +' + range_sve_cosf=' 0 0xffff0000 10000 0x1p-4 0x1p4 500000 @@ -519,6 +536,7 @@ L_tanf=2.7 L_log1p=1.97 L_expm1f=1.02 L_sinhf=1.76 +L_coshf=1.89 L_sve_cosf=1.57 L_sve_cos=1.61 @@ -646,6 +664,10 @@ sinhf __s_sinhf $runs fenv sinhf __v_sinhf $runv fenv sinhf __vn_sinhf $runvn fenv sinhf _ZGVnN4v_sinhf $runvn fenv +coshf __s_coshf $runs fenv +coshf __v_coshf $runv fenv +coshf __vn_coshf $runvn fenv +coshf _ZGVnN4v_coshf $runvn fenv sve_cosf __sv_cosf $runsv sve_cosf _ZGVsMxv_cosf $runsv diff --git a/pl/math/test/testcases/directed/coshf.tst b/pl/math/test/testcases/directed/coshf.tst new file mode 100644 index 0000000..cdc1d8d --- /dev/null +++ b/pl/math/test/testcases/directed/coshf.tst @@ -0,0 +1,15 @@ +; coshf.tst +; +; Copyright (c) 2007-2022, Arm Limited. +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +func=coshf op1=7fc00001 result=7fc00001 errno=0 +func=coshf op1=ffc00001 result=7fc00001 errno=0 +func=coshf op1=7f800001 result=7fc00001 errno=0 status=i +func=coshf op1=ff800001 result=7fc00001 errno=0 status=i +func=coshf op1=7f800000 result=7f800000 errno=0 +func=coshf op1=7f7fffff result=7f800000 errno=ERANGE status=ox +func=coshf op1=ff800000 result=7f800000 errno=0 +func=coshf op1=ff7fffff result=7f800000 errno=ERANGE status=ox +func=coshf op1=00000000 result=3f800000 errno=0 +func=coshf op1=80000000 result=3f800000 errno=0 diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h index ab5bcd6..ef7c7be 100644 --- a/pl/math/test/ulp_funcs.h +++ b/pl/math/test/ulp_funcs.h @@ -7,6 +7,7 @@ F1 (acosh) F1 (asinh) F2 (atan2) +F1 (cosh) F1 (erfc) F1 (erf) F1 (expm1) @@ -26,6 +27,7 @@ SF1 (atan) SD1 (atan) SF2 (atan2) SD2 (atan2) +SF1 (cosh) SF1 (erf) SD1 (erf) SF1 (erfc) @@ -45,6 +47,7 @@ VF1 (atan) VD1 (atan) VF2 (atan2) VD2 (atan2) +VF1 (cosh) VF1 (erf) VD1 (erf) VF1 (erfc) @@ -64,6 +67,7 @@ ZVNF1 (atan) ZVND1 (atan) ZVNF2 (atan2) ZVND2 (atan2) +ZVNF1 (cosh) ZVNF1 (erf) ZVND1 (erf) ZVNF1 (erfc) diff --git a/pl/math/test/ulp_wrappers.h b/pl/math/test/ulp_wrappers.h index 210b738..93cf75e 100644 --- a/pl/math/test/ulp_wrappers.h +++ b/pl/math/test/ulp_wrappers.h @@ -100,6 +100,7 @@ DECL_POW_INT_REF(ref_powi, long double, double, int) VF1_WRAP(asinh) VF1_WRAP(atan) VF2_WRAP(atan2) +VF1_WRAP(cosh) VF1_WRAP(erf) VF1_WRAP(erfc) VF1_WRAP(expm1) @@ -119,6 +120,7 @@ VD1_WRAP(log2) ZVNF1_WRAP(asinh) ZVNF1_WRAP(atan) ZVNF2_WRAP(atan2) +ZVNF1_WRAP(cosh) ZVNF1_WRAP(erf) ZVNF1_WRAP(erfc) ZVNF1_WRAP(expm1) diff --git a/pl/math/v_coshf_2u4.c b/pl/math/v_coshf_2u4.c new file mode 100644 index 0000000..7d7a228 --- /dev/null +++ b/pl/math/v_coshf_2u4.c @@ -0,0 +1,62 @@ +/* + * Single-precision vector cosh(x) function. + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "v_math.h" +#include "mathlib.h" + +#define AbsMask 0x7fffffff +#define TinyBound 0x20000000 /* 0x1p-63: Round to 1 below this. */ +#define SpecialBound \ + 0x42ad496c /* 0x1.5a92d8p+6: expf overflows above this, so have to use \ + special case. */ +#define Half v_f32 (0.5) + +#if V_SUPPORTED + +v_f32_t V_NAME (expf) (v_f32_t); + +/* Single-precision vector cosh, using vector expf. + Maximum error is 2.38 ULP: + __v_coshf(0x1.e8001ep+1) got 0x1.6a491ep+4 want 0x1.6a4922p+4. */ +VPCS_ATTR v_f32_t V_NAME (coshf) (v_f32_t x) +{ + v_u32_t ix = v_as_u32_f32 (x); + v_u32_t iax = ix & AbsMask; + v_f32_t ax = v_as_f32_u32 (iax); + v_u32_t special = v_cond_u32 (iax >= SpecialBound); + +#if WANT_ERRNO + /* If errno is to be set correctly, fall back to the scalar variant for all + inputs if any input is a special value or above the bound at which expf + overflows. */ + if (unlikely (v_any_u32 (special))) + return v_call_f32 (coshf, x, x, v_u32 (-1)); + + v_u32_t tiny = v_cond_u32 (iax <= TinyBound); + /* If any input is tiny, avoid underflow exception by fixing tiny lanes of + input to 1, which will generate no exceptions, and then also fixing tiny + lanes of output to 1 just before return. */ + if (unlikely (v_any_u32 (tiny))) + ax = v_sel_f32 (tiny, v_f32 (1), ax); +#endif + + /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */ + v_f32_t t = V_NAME (expf) (ax); + v_f32_t y = t * Half + Half / t; + +#if WANT_ERRNO + if (unlikely (v_any_u32 (tiny))) + return v_sel_f32 (tiny, v_f32 (1), y); +#else + if (unlikely (v_any_u32 (special))) + return v_call_f32 (coshf, x, y, special); +#endif + + return y; +} +VPCS_ALIAS + +#endif diff --git a/pl/math/vn_coshf_2u4.c b/pl/math/vn_coshf_2u4.c new file mode 100644 index 0000000..6bc4635 --- /dev/null +++ b/pl/math/vn_coshf_2u4.c @@ -0,0 +1,12 @@ +/* + * AdvSIMD vector PCS variant of __v_coshf. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +#include "include/mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#define VPCS_ALIAS strong_alias (__vn_coshf, _ZGVnN4v_coshf) +#include "v_coshf_2u4.c" +#endif |