aboutsummaryrefslogtreecommitdiff
path: root/src/fms.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fms.c')
-rw-r--r--src/fms.c304
1 files changed, 304 insertions, 0 deletions
diff --git a/src/fms.c b/src/fms.c
new file mode 100644
index 0000000..2c0fd53
--- /dev/null
+++ b/src/fms.c
@@ -0,0 +1,304 @@
+/* mpfr_fms -- Floating multiply-subtract
+
+Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+Contributed by the AriC and Caramel projects, INRIA.
+
+This file is part of the GNU MPFR Library.
+
+The GNU MPFR Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MPFR Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
+http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include "mpfr-impl.h"
+
+/* The fused-multiply-subtract (fms) of x, y and z is defined by:
+ fms(x,y,z)= x*y - z
+ Note: this is neither in IEEE754R, nor in LIA-2, but both the
+ PowerPC and the Itanium define fms as x*y - z.
+*/
+
+int
+mpfr_fms (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
+ mpfr_rnd_t rnd_mode)
+{
+ int inexact;
+ mpfr_t u;
+ MPFR_SAVE_EXPO_DECL (expo);
+ MPFR_GROUP_DECL(group);
+
+ MPFR_LOG_FUNC
+ (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg z[%Pu]=%.*Rg rnd=%d",
+ mpfr_get_prec (x), mpfr_log_prec, x,
+ mpfr_get_prec (y), mpfr_log_prec, y,
+ mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
+ ("s[%Pu]=%.*Rg inexact=%d",
+ mpfr_get_prec (s), mpfr_log_prec, s, inexact));
+
+ /* particular cases */
+ if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
+ MPFR_IS_SINGULAR(y) ||
+ MPFR_IS_SINGULAR(z) ))
+ {
+ if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
+ {
+ MPFR_SET_NAN(s);
+ MPFR_RET_NAN;
+ }
+ /* now neither x, y or z is NaN */
+ else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
+ {
+ /* cases Inf*0-z, 0*Inf-z, Inf-Inf */
+ if ((MPFR_IS_ZERO(y)) ||
+ (MPFR_IS_ZERO(x)) ||
+ (MPFR_IS_INF(z) &&
+ ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) == MPFR_SIGN(z))))
+ {
+ MPFR_SET_NAN(s);
+ MPFR_RET_NAN;
+ }
+ else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
+ {
+ MPFR_SET_INF(s);
+ MPFR_SET_OPPOSITE_SIGN(s, z);
+ MPFR_RET(0);
+ }
+ else /* z is finite */
+ {
+ MPFR_SET_INF(s);
+ MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
+ MPFR_RET(0);
+ }
+ }
+ /* now x and y are finite */
+ else if (MPFR_IS_INF(z))
+ {
+ MPFR_SET_INF(s);
+ MPFR_SET_OPPOSITE_SIGN(s, z);
+ MPFR_RET(0);
+ }
+ else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
+ {
+ if (MPFR_IS_ZERO(z))
+ {
+ int sign_p;
+ sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
+ MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
+ ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_POS(z))
+ ? -1 : 1) :
+ ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_NEG(z))
+ ? 1 : -1)));
+ MPFR_SET_ZERO(s);
+ MPFR_RET(0);
+ }
+ else
+ return mpfr_neg (s, z, rnd_mode);
+ }
+ else /* necessarily z is zero here */
+ {
+ MPFR_ASSERTD(MPFR_IS_ZERO(z));
+ return mpfr_mul (s, x, y, rnd_mode);
+ }
+ }
+
+ /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
+ is exact, except in case of overflow or underflow. */
+ MPFR_SAVE_EXPO_MARK (expo);
+ MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
+
+ if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
+ {
+ /* overflow or underflow - this case is regarded as rare, thus
+ does not need to be very efficient (even if some tests below
+ could have been done earlier).
+ It is an overflow iff u is an infinity (since MPFR_RNDN was used).
+ Alternatively, we could test the overflow flag, but in this case,
+ mpfr_clear_flags would have been necessary. */
+ if (MPFR_IS_INF (u)) /* overflow */
+ {
+ /* Let's eliminate the obvious case where x*y and z have the
+ same sign. No possible cancellation -> real overflow.
+ Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
+ then |x*y| >= 2^(emax+1), and |x*y - z| >= 2^emax. This case
+ is also an overflow. */
+ if (MPFR_SIGN (u) != MPFR_SIGN (z) ||
+ MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
+ {
+ MPFR_GROUP_CLEAR (group);
+ MPFR_SAVE_EXPO_FREE (expo);
+ return mpfr_overflow (s, rnd_mode, - MPFR_SIGN (z));
+ }
+
+ /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
+ (x/4)*y does not overflow (let's recall that the result
+ is exact with an unbounded exponent range). It does not
+ underflow either, because x*y overflows and the exponent
+ range is large enough. */
+ inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
+ MPFR_ASSERTN (inexact == 0);
+ inexact = mpfr_mul (u, u, y, MPFR_RNDN);
+ MPFR_ASSERTN (inexact == 0);
+
+ /* Now, we need to subtract z/4... But it may underflow! */
+ {
+ mpfr_t zo4;
+ mpfr_srcptr zz;
+ MPFR_BLOCK_DECL (flags);
+
+ if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
+ MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
+ {
+ /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
+ zz = z;
+ }
+ else
+ {
+ mpfr_init2 (zo4, MPFR_PREC (z));
+ if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
+ {
+ /* The division by 4 underflowed! */
+ MPFR_ASSERTN (0); /* TODO... */
+ }
+ zz = zo4;
+ }
+
+ /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
+ following subtraction would give the same result). */
+ MPFR_BLOCK (flags, inexact = mpfr_sub (s, u, zz, rnd_mode));
+ /* u and zz have the same sign, so that an overflow
+ is not possible. But an underflow is theoretically
+ possible! */
+ if (MPFR_UNDERFLOW (flags))
+ {
+ MPFR_ASSERTN (zz != z);
+ MPFR_ASSERTN (0); /* TODO... */
+ mpfr_clears (zo4, u, (mpfr_ptr) 0);
+ }
+ else
+ {
+ int inex2;
+
+ if (zz != z)
+ mpfr_clear (zo4);
+ MPFR_GROUP_CLEAR (group);
+ MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
+ inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
+ if (inex2) /* overflow */
+ {
+ inexact = inex2;
+ MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
+ }
+ goto end;
+ }
+ }
+ }
+ else /* underflow: one has |xy| < 2^(emin-1). */
+ {
+ unsigned long scale = 0;
+ mpfr_t scaled_z;
+ mpfr_srcptr new_z;
+ mpfr_exp_t diffexp;
+ mpfr_prec_t pzs;
+ int xy_underflows;
+
+ /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
+ (the + 1 on MPFR_PREC (s) is necessary because the exponent
+ of the result can be EXP(z) - 1). */
+ diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
+ pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
+ if (diffexp <= pzs)
+ {
+ mpfr_uexp_t uscale;
+ mpfr_t scaled_v;
+ MPFR_BLOCK_DECL (flags);
+
+ uscale = (mpfr_uexp_t) pzs - diffexp + 1;
+ MPFR_ASSERTN (uscale > 0);
+ MPFR_ASSERTN (uscale <= ULONG_MAX);
+ scale = uscale;
+ mpfr_init2 (scaled_z, MPFR_PREC (z));
+ inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
+ MPFR_ASSERTN (inexact == 0); /* TODO: overflow case */
+ new_z = scaled_z;
+ /* Now we need to recompute u = xy * 2^scale. */
+ MPFR_BLOCK (flags,
+ if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
+ {
+ mpfr_init2 (scaled_v, MPFR_PREC (x));
+ mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
+ mpfr_mul (u, scaled_v, y, MPFR_RNDN);
+ }
+ else
+ {
+ mpfr_init2 (scaled_v, MPFR_PREC (y));
+ mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
+ mpfr_mul (u, x, scaled_v, MPFR_RNDN);
+ });
+ mpfr_clear (scaled_v);
+ MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
+ xy_underflows = MPFR_UNDERFLOW (flags);
+ }
+ else
+ {
+ new_z = z;
+ xy_underflows = 1;
+ }
+
+ if (xy_underflows)
+ {
+ /* Let's replace xy by sign(xy) * 2^(emin-1). */
+ MPFR_PREC (u) = MPFR_PREC_MIN;
+ mpfr_setmin (u, __gmpfr_emin);
+ MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
+ MPFR_SIGN (y)));
+ }
+
+ {
+ MPFR_BLOCK_DECL (flags);
+
+ MPFR_BLOCK (flags, inexact = mpfr_sub (s, u, new_z, rnd_mode));
+ MPFR_GROUP_CLEAR (group);
+ if (scale != 0)
+ {
+ int inex2;
+
+ mpfr_clear (scaled_z);
+ /* Here an overflow is theoretically possible, in which case
+ the result may be wrong, hence the assert. An underflow
+ is not possible, but let's check that anyway. */
+ MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); /* TODO... */
+ MPFR_ASSERTN (! MPFR_UNDERFLOW (flags)); /* not possible */
+ inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
+ /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
+ Also, handle the double rounding case:
+ s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
+ if (inex2) /* underflow */
+ inexact = inex2;
+ }
+ }
+
+ /* FIXME/TODO: I'm not sure that the following is correct.
+ Check for possible spurious exceptions due to intermediate
+ computations. */
+ MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
+ goto end;
+ }
+ }
+
+ inexact = mpfr_sub (s, u, z, rnd_mode);
+ MPFR_GROUP_CLEAR (group);
+ MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
+ end:
+ MPFR_SAVE_EXPO_FREE (expo);
+ return mpfr_check_range (s, inexact, rnd_mode);
+}