aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--app/tests/benchmarks.c26
-rwxr-xr-xapp/tests/float.c26
-rw-r--r--arch/arm/arm-m/arch.c12
-rw-r--r--arch/arm/arm-m/exceptions.c19
-rw-r--r--arch/arm/arm-m/include/arch/arch_thread.h8
-rw-r--r--arch/arm/arm-m/include/arch/arm/cm.h6
-rw-r--r--arch/arm/arm-m/thread.c264
-rw-r--r--arch/arm/rules.mk16
-rw-r--r--arch/arm/toolchain.mk7
-rw-r--r--external/lib/libm/e_sqrt.c2
-rw-r--r--platform/stellaris/rules.mk6
-rw-r--r--platform/stm32f7xx/init.c4
-rw-r--r--platform/stm32f7xx/rules.mk2
13 files changed, 349 insertions, 49 deletions
diff --git a/app/tests/benchmarks.c b/app/tests/benchmarks.c
index 300f0dd6..e5b4d9d4 100644
--- a/app/tests/benchmarks.c
+++ b/app/tests/benchmarks.c
@@ -39,6 +39,10 @@ const uint ITER = 1024;
__NO_INLINE static void bench_set_overhead(void)
{
uint32_t *buf = malloc(BUFSIZE);
+ if (!buf) {
+ printf("failed to allocate buffer\n");
+ return;
+ }
uint count = arch_cycle_count();
for (uint i = 0; i < ITER; i++) {
@@ -55,6 +59,10 @@ __NO_INLINE static void bench_set_overhead(void)
__NO_INLINE static void bench_memset(void)
{
void *buf = malloc(BUFSIZE);
+ if (!buf) {
+ printf("failed to allocate buffer\n");
+ return;
+ }
uint count = arch_cycle_count();
for (uint i = 0; i < ITER; i++) {
@@ -72,6 +80,10 @@ __NO_INLINE static void bench_memset(void)
__NO_INLINE static void bench_cset_##type(void) \
{ \
type *buf = malloc(BUFSIZE); \
+ if (!buf) { \
+ printf("failed to allocate buffer\n"); \
+ return; \
+ } \
\
uint count = arch_cycle_count(); \
for (uint i = 0; i < ITER; i++) { \
@@ -95,6 +107,10 @@ bench_cset(uint64_t)
__NO_INLINE static void bench_cset_wide(void)
{
uint32_t *buf = malloc(BUFSIZE);
+ if (!buf) {
+ printf("failed to allocate buffer\n");
+ return;
+ }
uint count = arch_cycle_count();
for (uint i = 0; i < ITER; i++) {
@@ -119,7 +135,11 @@ __NO_INLINE static void bench_cset_wide(void)
__NO_INLINE static void bench_memcpy(void)
{
- uint8_t *buf = calloc(1, BUFSIZE);
+ uint8_t *buf = malloc(BUFSIZE);
+ if (!buf) {
+ printf("failed to allocate buffer\n");
+ return;
+ }
uint count = arch_cycle_count();
for (uint i = 0; i < ITER; i++) {
@@ -137,6 +157,10 @@ __NO_INLINE static void bench_memcpy(void)
__NO_INLINE static void arm_bench_cset_stm(void)
{
uint32_t *buf = malloc(BUFSIZE);
+ if (!buf) {
+ printf("failed to allocate buffer\n");
+ return;
+ }
uint count = arch_cycle_count();
for (uint i = 0; i < ITER; i++) {
diff --git a/app/tests/float.c b/app/tests/float.c
index 5d3d8c61..11e39332 100755
--- a/app/tests/float.c
+++ b/app/tests/float.c
@@ -38,25 +38,31 @@ extern void float_vfp_thumb_instruction_test(void);
extern void float_neon_arm_instruction_test(void);
extern void float_neon_thumb_instruction_test(void);
+#if !ARM_WITH_VFP_SP_ONLY
+#define FLOAT float
+#else
+#define FLOAT float
+#endif
+
/* optimize this function to cause it to try to use a lot of registers */
__OPTIMIZE("O3")
static int float_thread(void *arg)
{
- double *val = arg;
+ FLOAT *val = arg;
uint i, j;
- double a[16];
+ FLOAT a[16];
/* do a bunch of work with floating point to test context switching */
a[0] = *val;
for (i = 1; i < countof(a); i++) {
- a[i] = a[i-1] * 1.01;
+ a[i] = a[i-1] * 1.01f;
}
for (i = 0; i < 1000000; i++) {
- a[0] += i;
+ a[0] += 0.001f;
for (j = 1; j < countof(a); j++) {
- a[j] += a[j-1] * 0.00001;
+ a[j] += a[j-1] * 0.00001f;
}
}
@@ -65,7 +71,7 @@ static int float_thread(void *arg)
return 1;
}
-#if ARCH_ARM
+#if ARCH_ARM && !ARM_ISA_ARMV7M
static void arm_float_instruction_trap_test(void)
{
printf("testing fpu trap\n");
@@ -87,12 +93,14 @@ static void float_tests(void)
/* test lazy fpu load on separate thread */
thread_t *t[8];
- double val[countof(t)];
+ FLOAT val[countof(t)];
printf("creating %u floating point threads\n", countof(t));
for (uint i = 0; i < countof(t); i++) {
val[i] = i;
- t[i] = thread_create("float", &float_thread, &val[i], LOW_PRIORITY, DEFAULT_STACK_SIZE);
+ char name[32];
+ snprintf(name, sizeof(name), "float %u", i);
+ t[i] = thread_create(name, &float_thread, &val[i], LOW_PRIORITY, DEFAULT_STACK_SIZE);
thread_resume(t[i]);
}
@@ -103,7 +111,7 @@ static void float_tests(void)
}
printf("the above values should be close\n");
-#if ARCH_ARM
+#if ARCH_ARM && !ARM_ISA_ARMV7M
/* test all the instruction traps */
arm_float_instruction_trap_test();
#endif
diff --git a/arch/arm/arm-m/arch.c b/arch/arm/arm-m/arch.c
index 76c72022..b97d3792 100644
--- a/arch/arm/arm-m/arch.c
+++ b/arch/arm/arm-m/arch.c
@@ -42,7 +42,7 @@ void arch_early_init(void)
arch_disable_ints();
-#if (__CORTEX_M >= 0x03) || (CORTEX_SC >= 300)
+#if (__CORTEX_M >= 0x03) || (CORTEX_SC >= 300)
uint i;
/* set the vector table base */
SCB->VTOR = (uint32_t)&vectab;
@@ -89,6 +89,11 @@ void arch_early_init(void)
NVIC_SetPriority(DebugMonitor_IRQn, arm_cm_medium_priority());
#endif
+ /* FPU settings ------------------------------------------------------------*/
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ SCB->CPACR |= ((3UL << 10*2)|(3UL << 11*2)); /* set CP10 and CP11 Full Access */
+#endif
+
#if ARM_WITH_CACHE
arch_enable_cache(UCACHE);
#endif
@@ -101,6 +106,11 @@ void arch_init(void)
*REG32(DWT_CYCCNT) = 0;
*REG32(DWT_CTRL) |= 1; // enable cycle counter
#endif
+ printf("CONTROL 0x%x\n", __get_CONTROL());
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ printf("FPSCR 0x%x\n", __get_FPSCR());
+ printf("FPCCR 0x%x\n", FPU->FPCCR);
+#endif
}
void arch_quiesce(void)
diff --git a/arch/arm/arm-m/exceptions.c b/arch/arm/arm-m/exceptions.c
index 8112cd6c..e0b7f9ed 100644
--- a/arch/arm/arm-m/exceptions.c
+++ b/arch/arm/arm-m/exceptions.c
@@ -24,6 +24,7 @@
#include <stdio.h>
#include <compiler.h>
#include <stdint.h>
+#include <bits.h>
#include <kernel/thread.h>
#include <arch/arm/cm.h>
#include <platform.h>
@@ -90,6 +91,24 @@ static void usagefault(struct arm_cm_exception_frame *frame)
printf("usagefault: ");
dump_frame(frame);
+#if (__CORTEX_M >= 0x03)
+ uint32_t ufsr = BITS_SHIFT(SCB->CFSR, 31, 16);
+ printf("UFSR 0x%x: ", ufsr);
+
+ if (ufsr & (1<<0))
+ printf("undefined instruction\n");
+ if (ufsr & (1<<1))
+ printf("ESPR invalid\n");
+ if (ufsr & (1<<2))
+ printf("integrity check failed on EXC_RETURN\n");
+ if (ufsr & (1<<3))
+ printf("coprocessor access error\n");
+ if (ufsr & (1<<8))
+ printf("unaligned error\n");
+ if (ufsr & (1<<9))
+ printf("division by zero\n");
+#endif
+
platform_halt(HALT_ACTION_HALT, HALT_REASON_SW_PANIC);
}
diff --git a/arch/arm/arm-m/include/arch/arch_thread.h b/arch/arm/arm-m/include/arch/arch_thread.h
index 4e2b8396..4f6d67bb 100644
--- a/arch/arm/arm-m/include/arch/arch_thread.h
+++ b/arch/arm/arm-m/include/arch/arch_thread.h
@@ -29,6 +29,14 @@
struct arch_thread {
vaddr_t sp;
bool was_preempted;
+
+#if ARM_WITH_VFP
+ /* has this thread ever used the floating point state? */
+ bool fpused;
+
+ /* s16-s31 saved here. s0-s15, fpscr saved on exception frame */
+ float fpregs[16];
+#endif
};
#endif
diff --git a/arch/arm/arm-m/include/arch/arm/cm.h b/arch/arm/arm-m/include/arch/arm/cm.h
index d3db3ace..13a62796 100644
--- a/arch/arm/arm-m/include/arch/arm/cm.h
+++ b/arch/arm/arm-m/include/arch/arm/cm.h
@@ -100,6 +100,12 @@ struct arm_cm_exception_frame_long {
uint32_t psr;
};
+/* when fpu context save is enabled, this goes just above psr in the previous structs */
+struct arm_cm_exception_frame_fpu {
+ float s[16];
+ uint32_t fpscr;
+};
+
#if ARM_CM_DYNAMIC_PRIORITY_SIZE
extern unsigned int arm_cm_num_irq_pri_bits;
extern unsigned int arm_cm_irq_pri_mask;
diff --git a/arch/arm/arm-m/thread.c b/arch/arm/arm-m/thread.c
index 658cc5f7..fbca962c 100644
--- a/arch/arm/arm-m/thread.c
+++ b/arch/arm/arm-m/thread.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 Travis Geiselbrecht
+ * Copyright (c) 2012-2015 Travis Geiselbrecht
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
@@ -65,8 +65,8 @@ struct arm_cm_context_switch_frame {
#define SAVE_REGS "push { r4-r11, lr };"
#define RESTORE_REGS "pop { r4-r11, lr };"
#define RESTORE_REGS_PC "pop { r4-r11, pc };"
-#define SAVE_SP(basereg, tempreg) \
- "str sp, [" #basereg "];"
+#define SAVE_SP(basereg, tempreg, offset) "str sp, [" #basereg "," #offset "];"
+#define LOAD_SP(basereg, tempreg, offset) "ldr sp, [" #basereg "," #offset "];"
#define CLREX "clrex;"
#else
@@ -95,9 +95,12 @@ struct arm_cm_context_switch_frame {
"mov r10, r6;" \
"mov r11, r7;" \
"pop { r4-r7, pc };"
-#define SAVE_SP(basereg, tempreg) \
+#define SAVE_SP(basereg, tempreg, offset) \
"mov " #tempreg ", sp;" \
- "str " #tempreg ", [" #basereg "];"
+ "str " #tempreg ", [" #basereg "," #offset "];"
+#define LOAD_SP(basereg, tempreg, offset) \
+ "ldr " #tempreg ", [" #basereg "," #offset "];" \
+ "mov sp, " #tempreg ";"
/* there is no clrex on armv6m devices */
#define CLREX ""
@@ -143,6 +146,12 @@ void arch_thread_initialize(struct thread *t)
t->arch.sp = (addr_t)frame;
t->arch.was_preempted = false;
+
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* zero the fpu register state */
+ memset(t->arch.fpregs, 0, sizeof(t->arch.fpregs));
+ t->arch.fpused = false;
+#endif
}
static volatile struct arm_cm_exception_frame_long *preempt_frame;
@@ -193,47 +202,172 @@ __NAKED void _svc(void)
);
}
-__NAKED static void _half_save_and_svc(vaddr_t *fromsp, vaddr_t tosp)
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+__NAKED static void _half_save_and_svc(struct thread *oldthread, struct thread *newthread, bool fpu_save, bool restore_fpu)
+#else
+__NAKED static void _half_save_and_svc(struct thread *oldthread, struct thread *newthread)
+#endif
{
__asm__ volatile(
- SAVE_REGS
- SAVE_SP(r0, r2)
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* see if we need to save fpu context */
+ "tst r2, #1;"
+ "beq 0f;"
- /* make sure we load the destination sp here before we reenable interrupts */
- "mov sp, r1;"
+ /* save part of the fpu context on the stack */
+ "vmrs r2, fpscr;"
+ "push { r2 };"
+ "vpush { s0-s15 };"
- /* clear the load/store exclusive state */
- CLREX
+ /* save the top regs into the thread struct */
+ "add r2, r0, %[fp_off];"
+ "vstm r2, { s16-s31 };"
- /* reenable interrupts */
+ "0:"
+#endif
+
+ /* save regular context */
+ SAVE_REGS
+ SAVE_SP(r0, r2, %[sp_off])
+
+ /* restore the new thread's stack pointer, but not the integer state (yet) */
+ LOAD_SP(r1, r2, %[sp_off])
+
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* see if we need to restore fpu context */
+ "tst r3, #1;"
+ "beq 0f;"
+
+ /* restore the top part of the fpu context */
+ "add r3, r1, %[fp_off];"
+ "vldm r3, { s16-s31 };"
+
+ /* restore the bottom part of the context, stored up the frame a little bit */
+ "add r3, sp, %[fp_exc_off];"
+ "vldm r3!, { s0-s15 };"
+ "ldr r3, [r3];"
+ "vmsr fpscr, r3;"
+ "b 1f;"
+
+ /* disable fpu context if we're not restoring anything */
+ "0:"
+ "mrs r3, CONTROL;"
+ "bic r3, #(1<<2);" /* unset FPCA */
+ "msr CONTROL, r3;"
+ "isb;"
+
+ "1:"
+#endif
+
+ CLREX
"cpsie i;"
/* make a svc call to get us into handler mode.
* use r4 as an arg, since r0 is saved on the stack for the svc */
- "mov r4, r1;"
+ "mov r4, sp;"
"svc #0;"
+ :: [sp_off] "i"(offsetof(thread_t, arch.sp))
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ ,[fp_off] "i"(offsetof(thread_t, arch.fpregs))
+ ,[fp_exc_off] "i"(sizeof(struct arm_cm_exception_frame_long))
+#endif
);
}
/* simple scenario where the to and from thread yielded */
-__NAKED static void _arch_non_preempt_context_switch(vaddr_t *fromsp, vaddr_t tosp)
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+__NAKED static void _arch_non_preempt_context_switch(struct thread *oldthread, struct thread *newthread, bool save_fpu, bool restore_fpu)
+#else
+__NAKED static void _arch_non_preempt_context_switch(struct thread *oldthread, struct thread *newthread)
+#endif
{
__asm__ volatile(
- SAVE_REGS
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* see if we need to save fpu context */
+ "tst r2, #1;"
+ "beq 0f;"
- SAVE_SP(r0, r2)
+ /* save part of the fpu context on the stack */
+ "vmrs r2, fpscr;"
+ "push { r2 };"
+ "vpush { s0-s15 };"
+
+ /* save the top regs into the thread struct */
+ "add r2, r0, %[fp_off];"
+ "vstm r2, { s16-s31 };"
+
+ "0:"
+#endif
- "mov sp, r1;"
+ /* save regular context */
+ SAVE_REGS
+ SAVE_SP(r0, r2, %[sp_off])
+
+ /* restore new context */
+ LOAD_SP(r1, r2, %[sp_off])
+ RESTORE_REGS
+
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* see if we need to restore fpu context */
+ "tst r3, #1;"
+ "beq 0f;"
+
+ /* restore fpu context */
+ "add r3, r1, %[fp_off];"
+ "vldm r3, { s16-s31 };"
+
+ "vpop { s0-s15 };"
+ "pop { r3 };"
+ "vmsr fpscr, r3;"
+ "b 1f;"
+
+ /* disable fpu context if we're not restoring anything */
+ "0:"
+ "mrs r3, CONTROL;"
+ "bic r3, #(1<<2);" /* unset FPCA */
+ "msr CONTROL, r3;"
+ "isb;"
+
+ "1:"
+#endif
CLREX
- RESTORE_REGS_PC
+ "bx lr;"
+ :: [sp_off] "i"(offsetof(thread_t, arch.sp))
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ , [fp_off] "i"(offsetof(thread_t, arch.fpregs))
+#endif
);
}
-__NAKED static void _thread_mode_bounce(void)
+__NAKED static void _thread_mode_bounce(bool fpused)
{
__asm__ volatile(
- RESTORE_REGS_PC
+ /* restore main context */
+ RESTORE_REGS
+
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* see if we need to restore fpu context */
+ "tst r0, #1;"
+ "beq 0f;"
+
+ /* restore fpu context */
+ "vpop { s0-s15 };"
+ "pop { r0 };"
+ "vmsr fpscr, r0;"
+ "b 1f;"
+
+ /* disable fpu context if we're not restoring anything */
+ "0:"
+ "mrs r3, CONTROL;"
+ "bic r3, #(1<<2);" /* unset FPCA */
+ "msr CONTROL, r3;"
+ "isb;"
+
+ "1:"
+#endif
+
+ "bx lr;"
);
__UNREACHABLE;
}
@@ -247,17 +381,62 @@ __NAKED static void _thread_mode_bounce(void)
*/
void arch_context_switch(struct thread *oldthread, struct thread *newthread)
{
- LTRACE_ENTRY;
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ LTRACEF("FPCCR.LSPACT %lu, FPCAR 0x%x, CONTROL.FPCA %lu\n",
+ FPU->FPCCR & FPU_FPCCR_LSPACT_Msk, FPU->FPCAR, __get_CONTROL() & CONTROL_FPCA_Msk);
+#endif
/* if preempt_frame is set, we are being preempted */
if (preempt_frame) {
+ LTRACEF("we're preempted, old frame %p, old lr 0x%x, pc 0x%x, new preempted bool %d\n",
+ preempt_frame, preempt_frame->lr, preempt_frame->pc, newthread->arch.was_preempted);
+
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* see if extended fpu frame was pushed */
+ if ((preempt_frame->lr & (1<<4)) == 0) {
+ LTRACEF("thread %s pushed fpu frame\n", oldthread->name);
+
+ /* save the top part of the context */
+ /* note this should also trigger a lazy fpu save if it hasn't already done so */
+ asm volatile("vstm %0, { s16-s31 }" :: "r" (&oldthread->arch.fpregs[0]));
+ oldthread->arch.fpused = true;
+
+ /* verify that FPCCR.LSPACT was cleared and CONTROL.FPCA was set */
+ DEBUG_ASSERT((FPU->FPCCR & FPU_FPCCR_LSPACT_Msk) == 0);
+ DEBUG_ASSERT(__get_CONTROL() & CONTROL_FPCA_Msk);
+ } else {
+ DEBUG_ASSERT(oldthread->arch.fpused == false);
+ }
+#endif
+
oldthread->arch.was_preempted = true;
oldthread->arch.sp = (addr_t)preempt_frame;
preempt_frame = NULL;
- LTRACEF("we're preempted, new %d\n", newthread->arch.was_preempted);
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* if new thread has saved fpu state, restore it */
+ if (newthread->arch.fpused) {
+ LTRACEF("newthread FPCCR.LSPACT %lu, FPCAR 0x%x, CONTROL.FPCA %lu\n",
+ FPU->FPCCR & FPU_FPCCR_LSPACT_Msk, FPU->FPCAR, __get_CONTROL() & CONTROL_FPCA_Msk);
+
+ /* enable the fpu manually */
+ __set_CONTROL(__get_CONTROL() | CONTROL_FPCA_Msk);
+ asm volatile("isb");
+
+ DEBUG_ASSERT((FPU->FPCCR & FPU_FPCCR_LSPACT_Msk) == 0);
+ DEBUG_ASSERT(__get_CONTROL() & CONTROL_FPCA_Msk);
+
+ /* restore the top of the fpu state, the rest will happen below */
+ asm volatile("vldm %0, { s16-s31 }" :: "r" (&newthread->arch.fpregs[0]));
+ }
+#endif
+
if (newthread->arch.was_preempted) {
/* return directly to the preempted thread's iframe */
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ LTRACEF("newthread2 FPCCR.LSPACT %lu, FPCAR 0x%x, CONTROL.FPCA %lu\n",
+ FPU->FPCCR & FPU_FPCCR_LSPACT_Msk, FPU->FPCAR, __get_CONTROL() & CONTROL_FPCA_Msk);
+#endif
__asm__ volatile(
"mov sp, %0;"
"cpsie i;"
@@ -274,10 +453,17 @@ void arch_context_switch(struct thread *oldthread, struct thread *newthread)
frame->pc = (uint32_t)&_thread_mode_bounce;
frame->psr = (1 << 24); /* thread bit set, IPSR 0 */
- frame->r0 = frame->r1 = frame->r2 = frame->r3 = frame->r12 = frame->lr = 99;
+ frame->r0 = frame->r1 = frame->r2 = frame->r3 = frame->r12 = frame->lr = 0;
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* pass the fpused bool to _thread_mode_bounce */
+ frame->r0 = newthread->arch.fpused;
+#endif
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ LTRACEF("iretting to user space, fpused %u\n", newthread->arch.fpused);
+#else
LTRACEF("iretting to user space\n");
- //hexdump(frame, sizeof(*frame) + 64);
+#endif
__asm__ volatile(
CLREX
@@ -290,12 +476,30 @@ void arch_context_switch(struct thread *oldthread, struct thread *newthread)
} else {
oldthread->arch.was_preempted = false;
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ /* see if we have fpu state we need to save */
+ if (!oldthread->arch.fpused && __get_CONTROL() & CONTROL_FPCA_Msk) {
+ /* mark this thread as using float */
+ LTRACEF("thread %s uses float\n", oldthread->name);
+ oldthread->arch.fpused = true;
+ }
+#endif
+
if (newthread->arch.was_preempted) {
LTRACEF("not being preempted, but switching to preempted thread\n");
- _half_save_and_svc(&oldthread->arch.sp, newthread->arch.sp);
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ _half_save_and_svc(oldthread, newthread, oldthread->arch.fpused, newthread->arch.fpused);
+#else
+ _half_save_and_svc(oldthread, newthread);
+#endif
} else {
/* fast path, both sides did not preempt */
- _arch_non_preempt_context_switch(&oldthread->arch.sp, newthread->arch.sp);
+ LTRACEF("both sides are not preempted newsp 0x%lx\n", newthread->arch.sp);
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ _arch_non_preempt_context_switch(oldthread, newthread, oldthread->arch.fpused, newthread->arch.fpused);
+#else
+ _arch_non_preempt_context_switch(oldthread, newthread);
+#endif
}
}
@@ -305,7 +509,11 @@ void arch_dump_thread(thread_t *t)
{
if (t->state != THREAD_RUNNING) {
dprintf(INFO, "\tarch: ");
- dprintf(INFO, "sp 0x%lx, was preempted %u\n", t->arch.sp, t->arch.was_preempted);
+ dprintf(INFO, "sp 0x%lx, was preempted %u", t->arch.sp, t->arch.was_preempted);
+#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
+ dprintf(INFO, ", fpused %u", t->arch.fpused);
+#endif
+ dprintf(INFO, "\n");
}
}
diff --git a/arch/arm/rules.mk b/arch/arm/rules.mk
index 83bb10d1..713abb0f 100644
--- a/arch/arm/rules.mk
+++ b/arch/arm/rules.mk
@@ -62,7 +62,7 @@ GLOBAL_DEFINES += \
ARM_WITH_THUMB=1 \
ARM_WITH_THUMB2=1 \
ARM_WITH_VFP=1 \
- __FPU_PRESENT=1
+ ARM_WITH_VFP_SP_ONLY=1
HANDLED_CORE := true
ENABLE_THUMB := true
SUBARCH := arm-m
@@ -79,6 +79,20 @@ HANDLED_CORE := true
ENABLE_THUMB := true
SUBARCH := arm-m
endif
+ifeq ($(ARM_CPU),cortex-m7-fpu-sp-d16)
+GLOBAL_DEFINES += \
+ ARM_CPU_CORTEX_M7=1 \
+ ARM_ISA_ARMv7=1 \
+ ARM_ISA_ARMv7M=1 \
+ ARM_WITH_THUMB=1 \
+ ARM_WITH_THUMB2=1 \
+ ARM_WITH_CACHE=1 \
+ ARM_WITH_VFP=1 \
+ ARM_WITH_VFP_SP_ONLY=1
+HANDLED_CORE := true
+ENABLE_THUMB := true
+SUBARCH := arm-m
+endif
ifeq ($(ARM_CPU),cortex-a7)
GLOBAL_DEFINES += \
ARM_WITH_CP15=1 \
diff --git a/arch/arm/toolchain.mk b/arch/arm/toolchain.mk
index 10cf6b6e..65f62fda 100644
--- a/arch/arm/toolchain.mk
+++ b/arch/arm/toolchain.mk
@@ -65,10 +65,15 @@ ifeq ($(ARM_CPU),cortex-m4)
ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU)
endif
ifeq ($(ARM_CPU),cortex-m7)
+# use cortex-m4 for now until better general toolchain support
ARCH_arm_COMPILEFLAGS += -mcpu=cortex-m4
endif
+ifeq ($(ARM_CPU),cortex-m7-fpu-sp-d16)
+# use cortex-m4 for now until better general toolchain support
+ARCH_arm_COMPILEFLAGS += -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mfloat-abi=softfp
+endif
ifeq ($(ARM_CPU),cortex-m4f)
-ARCH_arm_COMPILEFLAGS += -mcpu=cortex-m4 -mfloat-abi=softfp
+ARCH_arm_COMPILEFLAGS += -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mfloat-abi=softfp
endif
ifeq ($(ARM_CPU),cortex-a7)
ARCH_arm_COMPILEFLAGS += -mcpu=$(ARM_CPU)
diff --git a/external/lib/libm/e_sqrt.c b/external/lib/libm/e_sqrt.c
index 1abcb014..6d288284 100644
--- a/external/lib/libm/e_sqrt.c
+++ b/external/lib/libm/e_sqrt.c
@@ -89,7 +89,7 @@ __FBSDID("$FreeBSD$");
#include "math.h"
#include "math_private.h"
-#if defined(LK) && ARCH_ARM && ARM_WITH_VFP
+#if defined(LK) && ARCH_ARM && ARM_WITH_VFP && !ARM_WITH_VFP_SP_ONLY
/* use ARM w/VFP sqrt instruction */
double
__ieee754_sqrt(double x)
diff --git a/platform/stellaris/rules.mk b/platform/stellaris/rules.mk
index b6aef260..b602342c 100644
--- a/platform/stellaris/rules.mk
+++ b/platform/stellaris/rules.mk
@@ -10,8 +10,10 @@ ifeq ($(STELLARIS_CHIP),LM4F120H5QR)
MEMSIZE ?= 32768
MEMBASE := 0x20000000
ROMBASE := 0x00000000
-ARM_CPU := cortex-m3
-GLOBAL_DEFINES += TARGET_IS_BLIZZARD_RA1
+ARM_CPU := cortex-m4f
+GLOBAL_DEFINES += \
+ TARGET_IS_BLIZZARD_RA1 \
+ __FPU_PRESENT=1
endif
ifeq ($(STELLARIS_CHIP),LM3S6965)
MEMSIZE ?= 65536
diff --git a/platform/stm32f7xx/init.c b/platform/stm32f7xx/init.c
index 012eeb96..e8ef5649 100644
--- a/platform/stm32f7xx/init.c
+++ b/platform/stm32f7xx/init.c
@@ -41,10 +41,6 @@ extern const sdram_config_t target_sdram_config;
void SystemInit(void)
{
- /* FPU settings ------------------------------------------------------------*/
-#if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
- SCB->CPACR |= ((3UL << 10*2)|(3UL << 11*2)); /* set CP10 and CP11 Full Access */
-#endif
/* Reset the RCC clock configuration to the default reset state ------------*/
/* Set HSION bit */
RCC->CR |= (uint32_t)0x00000001;
diff --git a/platform/stm32f7xx/rules.mk b/platform/stm32f7xx/rules.mk
index 79ee999e..f040a492 100644
--- a/platform/stm32f7xx/rules.mk
+++ b/platform/stm32f7xx/rules.mk
@@ -10,7 +10,7 @@ MEMBASE ?= 0x20010000
MEMSIZE ?= 0x40000
ARCH := arm
-ARM_CPU := cortex-m7
+ARM_CPU := cortex-m7-fpu-sp-d16
ifeq ($(STM32_CHIP),stm32f746)
GLOBAL_DEFINES += STM32F746xx