diff options
-rw-r--r-- | internal/multi_thread_gemm.h | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/internal/multi_thread_gemm.h b/internal/multi_thread_gemm.h index be33d5f..0aacddb 100644 --- a/internal/multi_thread_gemm.h +++ b/internal/multi_thread_gemm.h @@ -66,6 +66,34 @@ inline int Do256NOPs() { return 0; } #endif // not GEMMLOWP_ALLOW_INLINE_ASM +inline void WriteBarrier() { +#ifdef GEMMLOWP_ARM_32 + MemoryBarrier(); +#elif defined(GEMMLOWP_ARM_64) + asm volatile("dmb ishst" ::: "memory"); +#elif defined(GEMMLOWP_X86) + asm volatile("sfence" ::: "memory"); +#elif defined(__mips__) + MemoryBarrier(); +#else +#error "Unsupported architecture for WriteBarrier." +#endif +} + +inline void ReadBarrier() { +#ifdef GEMMLOWP_ARM_32 + MemoryBarrier(); +#elif defined(GEMMLOWP_ARM_64) + asm volatile("dmb ishld" ::: "memory"); +#elif defined(GEMMLOWP_X86) + asm volatile("lfence" ::: "memory"); +#elif defined(__mips__) + MemoryBarrier(); +#else +#error "Unsupported architecture for ReadBarrier." +#endif +} + // Waits until *var != initial_value. // // Returns the new value of *var. The guarantee here is that @@ -255,6 +283,7 @@ class Worker { switch (state_to_act_upon) { case State::HasWork: // Got work to do! So do it, and then revert to 'Ready' state. + ReadBarrier(); assert(task_); task_->Run(); delete task_; @@ -280,6 +309,7 @@ class Worker { assert(!task_); task->local_allocator = &local_allocator_; task_ = task; + WriteBarrier(); assert(state_ == State::Ready); ChangeState(State::HasWork); } |