aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Beare <bruce.j.beare@intel.com>2011-09-15 10:59:01 -0700
committerDavid 'Digit' Turner <digit@google.com>2011-09-29 17:35:33 +0200
commit6cda7b6249c05ebcaebeb86badf957ec0f04e4ad (patch)
tree14578aaa3b23795029c105c6e1af50e8a05b8891
parent50a83255d80f98b857c3f72dd2225d4bbc720ca3 (diff)
downloadbionic-6cda7b6249c05ebcaebeb86badf957ec0f04e4ad.tar.gz
Bionic: x86: Fix libm macro definitions
http://code.google.com/p/android/issues/detail?id=19276 GCC would remove inline asm due to lack of knowledge of FPU register changes. Change-Id: I9f9e8623fa6580843b7cd8178439ace8c2db2d51 Signed-off-by: Mark D Horn <mark.d.horn@intel.com> Signed-off-by: Bruce Beare <bruce.j.beare@intel.com> Author: Jingwei Zhang <jingwei.zhang@intel.com>
-rw-r--r--libm/i387/fenv.c285
-rw-r--r--libm/include/i387/fenv.h167
2 files changed, 246 insertions, 206 deletions
diff --git a/libm/i387/fenv.c b/libm/i387/fenv.c
index aabe270d5..89ddc55ed 100644
--- a/libm/i387/fenv.c
+++ b/libm/i387/fenv.c
@@ -31,16 +31,46 @@
#include "npx.h"
#include "fenv.h"
+/*
+ * As compared to the x87 control word, the SSE unit's control word
+ * has the rounding control bits offset by 3 and the exception mask
+ * bits offset by 7.
+ */
+#define _SSE_ROUND_SHIFT 3
+#define _SSE_EMASK_SHIFT 7
+
const fenv_t __fe_dfl_env = {
- __INITIAL_NPXCW__,
- 0x0000,
- 0x0000,
- 0x1f80,
- 0xffffffff,
+ __INITIAL_NPXCW__, /*__control*/
+ 0x0000, /*__mxcsr_hi*/
+ 0x0000, /*__status*/
+ 0x1f80, /*__mxcsr_lo*/
+ 0xffffffff, /*__tag*/
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff }
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/
};
+#define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw))
+#define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env))
+#define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \
+ : "st", "st(1)", "st(2)", "st(3)", "st(4)", \
+ "st(5)", "st(6)", "st(7)")
+#define __fnclex() __asm __volatile("fnclex")
+#define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env)))
+#define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
+#define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
+#define __fwait() __asm __volatile("fwait")
+#define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr))
+#define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
+
+/* After testing for SSE support once, we cache the result in __has_sse. */
+enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
+#ifdef __SSE__
+#define __HAS_SSE() 1
+#else
+#define __HAS_SSE() (__has_sse == __SSE_YES || \
+ (__has_sse == __SSE_UNK && __test_sse()))
+#endif
+
enum __sse_support __has_sse =
#ifdef __SSE__
__SSE_YES;
@@ -48,6 +78,7 @@ enum __sse_support __has_sse =
__SSE_UNK;
#endif
+#ifndef __SSE__
#define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
#define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
#define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \
@@ -82,23 +113,27 @@ __test_sse(void)
__has_sse = __SSE_NO;
return (0);
}
+#endif /* __SSE__ */
int
fesetexceptflag(const fexcept_t *flagp, int excepts)
{
fenv_t env;
- int mxcsr;
-
- __fnstenv(&env);
- env.__status &= ~excepts;
- env.__status |= *flagp & excepts;
- __fldenv(env);
+ __uint32_t mxcsr;
- if (__HAS_SSE()) {
- __stmxcsr(&mxcsr);
- mxcsr &= ~excepts;
- mxcsr |= *flagp & excepts;
- __ldmxcsr(mxcsr);
+ excepts &= FE_ALL_EXCEPT;
+ if (excepts) { /* Do nothing if excepts is 0 */
+ __fnstenv(&env);
+ env.__status &= ~excepts;
+ env.__status |= *flagp & excepts;
+ __fnclex();
+ __fldenv(env);
+ if (__HAS_SSE()) {
+ __stmxcsr(&mxcsr);
+ mxcsr &= ~excepts;
+ mxcsr |= *flagp & excepts;
+ __ldmxcsr(mxcsr);
+ }
}
return (0);
@@ -117,32 +152,38 @@ feraiseexcept(int excepts)
int
fegetenv(fenv_t *envp)
{
- int control, mxcsr;
+ __uint32_t mxcsr;
+ __fnstenv(envp);
/*
- * fnstenv masks all exceptions, so we need to save and
- * restore the control word to avoid this side effect.
+ * fnstenv masks all exceptions, so we need to restore
+ * the old control word to avoid this side effect.
*/
- __fnstcw(&control);
- __fnstenv(envp);
+ __fldcw(envp->__control);
if (__HAS_SSE()) {
__stmxcsr(&mxcsr);
- __set_mxcsr(*envp, mxcsr);
+ envp->__mxcsr_hi = mxcsr >> 16;
+ envp->__mxcsr_lo = mxcsr & 0xffff;
}
- __fldcw(control);
return (0);
}
int
feholdexcept(fenv_t *envp)
{
- int mxcsr;
+ __uint32_t mxcsr;
+ fenv_t env;
- __fnstenv(envp);
+ __fnstenv(&env);
+ *envp = env;
+ env.__status &= ~FE_ALL_EXCEPT;
+ env.__control |= FE_ALL_EXCEPT;
__fnclex();
+ __fldenv(env);
if (__HAS_SSE()) {
__stmxcsr(&mxcsr);
- __set_mxcsr(*envp, mxcsr);
+ envp->__mxcsr_hi = mxcsr >> 16;
+ envp->__mxcsr_lo = mxcsr & 0xffff;
mxcsr &= ~FE_ALL_EXCEPT;
mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
__ldmxcsr(mxcsr);
@@ -153,60 +194,198 @@ feholdexcept(fenv_t *envp)
int
feupdateenv(const fenv_t *envp)
{
- int mxcsr;
- short status;
+ __uint32_t mxcsr;
+ __uint16_t status;
__fnstsw(&status);
- if (__HAS_SSE())
+ if (__HAS_SSE()) {
__stmxcsr(&mxcsr);
- else
+ } else {
mxcsr = 0;
+ }
fesetenv(envp);
feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
return (0);
}
int
-__feenableexcept(int mask)
+feenableexcept(int mask)
{
- int mxcsr, control, omask;
+ __uint32_t mxcsr;
+ __uint16_t control, omask;
mask &= FE_ALL_EXCEPT;
__fnstcw(&control);
- if (__HAS_SSE())
+ if (__HAS_SSE()) {
__stmxcsr(&mxcsr);
- else
+ } else {
mxcsr = 0;
- omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
- control &= ~mask;
- __fldcw(control);
- if (__HAS_SSE()) {
- mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
- __ldmxcsr(mxcsr);
}
- return (~omask);
+ omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
+ if (mask) {
+ control &= ~mask;
+ __fldcw(control);
+ if (__HAS_SSE()) {
+ mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
+ __ldmxcsr(mxcsr);
+ }
+ }
+ return (omask);
}
int
-__fedisableexcept(int mask)
+fedisableexcept(int mask)
{
- int mxcsr, control, omask;
+ __uint32_t mxcsr;
+ __uint16_t control, omask;
mask &= FE_ALL_EXCEPT;
__fnstcw(&control);
- if (__HAS_SSE())
+ if (__HAS_SSE()) {
__stmxcsr(&mxcsr);
- else
+ } else {
mxcsr = 0;
- omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
- control |= mask;
- __fldcw(control);
+ }
+ omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
+ if (mask) {
+ control |= mask;
+ __fldcw(control);
+ if (__HAS_SSE()) {
+ mxcsr |= mask << _SSE_EMASK_SHIFT;
+ __ldmxcsr(mxcsr);
+ }
+ }
+ return (omask);
+}
+
+int
+feclearexcept(int excepts)
+{
+ fenv_t env;
+ __uint32_t mxcsr;
+
+ excepts &= FE_ALL_EXCEPT;
+ if (excepts) { /* Do nothing if excepts is 0 */
+ __fnstenv(&env);
+ env.__status &= ~excepts;
+ __fnclex();
+ __fldenv(env);
+ if (__HAS_SSE()) {
+ __stmxcsr(&mxcsr);
+ mxcsr &= ~excepts;
+ __ldmxcsr(mxcsr);
+ }
+ }
+ return (0);
+}
+
+int
+fegetexceptflag(fexcept_t *flagp, int excepts)
+{
+ __uint32_t mxcsr;
+ __uint16_t status;
+
+ excepts &= FE_ALL_EXCEPT;
+ __fnstsw(&status);
+ if (__HAS_SSE()) {
+ __stmxcsr(&mxcsr);
+ } else {
+ mxcsr = 0;
+ }
+ *flagp = (status | mxcsr) & excepts;
+ return (0);
+}
+
+int
+fetestexcept(int excepts)
+{
+ __uint32_t mxcsr;
+ __uint16_t status;
+
+ excepts &= FE_ALL_EXCEPT;
+ if (excepts) { /* Do nothing if excepts is 0 */
+ __fnstsw(&status);
+ if (__HAS_SSE()) {
+ __stmxcsr(&mxcsr);
+ } else {
+ mxcsr = 0;
+ }
+ return ((status | mxcsr) & excepts);
+ }
+ return (0);
+}
+
+int
+fegetround(void)
+{
+ __uint16_t control;
+
+ /*
+ * We assume that the x87 and the SSE unit agree on the
+ * rounding mode. Reading the control word on the x87 turns
+ * out to be about 5 times faster than reading it on the SSE
+ * unit on an Opteron 244.
+ */
+ __fnstcw(&control);
+ return (control & _ROUND_MASK);
+}
+
+int
+fesetround(int round)
+{
+ __uint32_t mxcsr;
+ __uint16_t control;
+
+ if (round & ~_ROUND_MASK) {
+ return (-1);
+ } else {
+ __fnstcw(&control);
+ control &= ~_ROUND_MASK;
+ control |= round;
+ __fldcw(control);
+ if (__HAS_SSE()) {
+ __stmxcsr(&mxcsr);
+ mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
+ mxcsr |= round << _SSE_ROUND_SHIFT;
+ __ldmxcsr(mxcsr);
+ }
+ return (0);
+ }
+}
+
+int
+fesetenv(const fenv_t *envp)
+{
+ fenv_t env = *envp;
+ __uint32_t mxcsr;
+
+ mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo);
+ env.__mxcsr_hi = 0xffff;
+ env.__mxcsr_lo = 0xffff;
+ /*
+ * XXX Using fldenvx() instead of fldenv() tells the compiler that this
+ * instruction clobbers the i387 register stack. This happens because
+ * we restore the tag word from the saved environment. Normally, this
+ * would happen anyway and we wouldn't care, because the ABI allows
+ * function calls to clobber the i387 regs. However, fesetenv() is
+ * inlined, so we need to be more careful.
+ */
+ __fldenvx(env);
if (__HAS_SSE()) {
- mxcsr |= mask << _SSE_EMASK_SHIFT;
__ldmxcsr(mxcsr);
}
- return (~omask);
+ return (0);
}
-__weak_reference(__feenableexcept, feenableexcept);
-__weak_reference(__fedisableexcept, fedisableexcept);
+int
+fegetexcept(void)
+{
+ __uint16_t control;
+
+ /*
+ * We assume that the masks for the x87 and the SSE unit are
+ * the same.
+ */
+ __fnstcw(&control);
+ return (~control & FE_ALL_EXCEPT);
+}
diff --git a/libm/include/i387/fenv.h b/libm/include/i387/fenv.h
index 4281f10eb..710494c00 100644
--- a/libm/include/i387/fenv.h
+++ b/libm/include/i387/fenv.h
@@ -45,13 +45,6 @@ typedef struct {
char __other[16];
} fenv_t;
-#define __get_mxcsr(env) (((env).__mxcsr_hi << 16) | \
- ((env).__mxcsr_lo))
-#define __set_mxcsr(env, x) do { \
- (env).__mxcsr_hi = (__uint32_t)(x) >> 16; \
- (env).__mxcsr_lo = (__uint16_t)(x); \
-} while (0)
-
typedef __uint16_t fexcept_t;
/* Exception flags */
@@ -72,167 +65,35 @@ typedef __uint16_t fexcept_t;
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
-/*
- * As compared to the x87 control word, the SSE unit's control word
- * has the rounding control bits offset by 3 and the exception mask
- * bits offset by 7.
- */
-#define _SSE_ROUND_SHIFT 3
-#define _SSE_EMASK_SHIFT 7
-
-/* After testing for SSE support once, we cache the result in __has_sse. */
-enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
-extern enum __sse_support __has_sse;
-int __test_sse(void);
-#ifdef __SSE__
-#define __HAS_SSE() 1
-#else
-#define __HAS_SSE() (__has_sse == __SSE_YES || \
- (__has_sse == __SSE_UNK && __test_sse()))
-#endif
-
__BEGIN_DECLS
/* Default floating-point environment */
extern const fenv_t __fe_dfl_env;
#define FE_DFL_ENV (&__fe_dfl_env)
-#define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw))
-#define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env))
-#define __fnclex() __asm __volatile("fnclex")
-#define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env)))
-#define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
-#define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=a" (*(__sw)))
-#define __fwait() __asm __volatile("fwait")
-#define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr))
-#define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
-
-static __inline int
-feclearexcept(int __excepts)
-{
- fenv_t __env;
- int __mxcsr;
-
- if (__excepts == FE_ALL_EXCEPT) {
- __fnclex();
- } else {
- __fnstenv(&__env);
- __env.__status &= ~__excepts;
- __fldenv(__env);
- }
- if (__HAS_SSE()) {
- __stmxcsr(&__mxcsr);
- __mxcsr &= ~__excepts;
- __ldmxcsr(__mxcsr);
- }
- return (0);
-}
-
-static __inline int
-fegetexceptflag(fexcept_t *__flagp, int __excepts)
-{
- int __mxcsr, __status;
-
- __fnstsw(&__status);
- if (__HAS_SSE())
- __stmxcsr(&__mxcsr);
- else
- __mxcsr = 0;
- *__flagp = (__mxcsr | __status) & __excepts;
- return (0);
-}
-
-int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
-int feraiseexcept(int __excepts);
-
-static __inline int
-fetestexcept(int __excepts)
-{
- int __mxcsr;
- short __status;
-
- __fnstsw(&__status);
- if (__HAS_SSE())
- __stmxcsr(&__mxcsr);
- else
- __mxcsr = 0;
- return ((__status | __mxcsr) & __excepts);
-}
-
-static __inline int
-fegetround(void)
-{
- int __control;
+/* C99 floating-point exception functions */
+int feclearexcept(int excepts);
+int fegetexceptflag(fexcept_t *flagp, int excepts);
+int fesetexceptflag(const fexcept_t *flagp, int excepts);
+/* feraiseexcept does not set the inexact flag on overflow/underflow */
+int feraiseexcept(int excepts);
+int fetestexcept(int excepts);
- /*
- * We assume that the x87 and the SSE unit agree on the
- * rounding mode. Reading the control word on the x87 turns
- * out to be about 5 times faster than reading it on the SSE
- * unit on an Opteron 244.
- */
- __fnstcw(&__control);
- return (__control & _ROUND_MASK);
-}
-
-static __inline int
-fesetround(int __round)
-{
- int __mxcsr, __control;
-
- if (__round & ~_ROUND_MASK)
- return (-1);
-
- __fnstcw(&__control);
- __control &= ~_ROUND_MASK;
- __control |= __round;
- __fldcw(__control);
-
- if (__HAS_SSE()) {
- __stmxcsr(&__mxcsr);
- __mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
- __mxcsr |= __round << _SSE_ROUND_SHIFT;
- __ldmxcsr(__mxcsr);
- }
-
- return (0);
-}
+/* C99 rounding control functions */
+int fegetround(void);
+int fesetround(int round);
+/* C99 floating-point environment functions */
int fegetenv(fenv_t *__envp);
int feholdexcept(fenv_t *__envp);
-
-static __inline int
-fesetenv(const fenv_t *__envp)
-{
- fenv_t __env = *__envp;
- int __mxcsr;
-
- __mxcsr = __get_mxcsr(__env);
- __set_mxcsr(__env, 0xffffffff);
- __fldenv(__env);
- if (__HAS_SSE())
- __ldmxcsr(__mxcsr);
- return (0);
-}
-
+int fesetenv(const fenv_t *envp);
int feupdateenv(const fenv_t *__envp);
#if __BSD_VISIBLE
-
+/* Additional support functions to set/query floating point traps */
int feenableexcept(int __mask);
int fedisableexcept(int __mask);
-
-static __inline int
-fegetexcept(void)
-{
- int __control;
-
- /*
- * We assume that the masks for the x87 and the SSE unit are
- * the same.
- */
- __fnstcw(&__control);
- return (~__control & FE_ALL_EXCEPT);
-}
+int fegetexcept(void);
#endif /* __BSD_VISIBLE */