Rebase gemmlowp to 6a2a908temp_72223856

Bug: 70573221 Test: mm Test: mm and Pixel2 boot Test: NeuralNetworksTest pass Change-Id: I8fac98811e9a276d3ff8054167dc45225c04147e
author: Miao Wang <miaowang@google.com> 2017-12-12 14:22:24 -0800
committer: Miao Wang <miaowang@google.com> 2017-12-12 16:14:38 -0800
commit: 1963df9ac4a0424674e72ef5da522b5d830605fd (patch)
tree: efd8fbbe69f13c4057f2cc5a5b1f7852fd57a2ab /profiling
parent: cbcfdf963151219ca77f54657defabde8d845bac (diff)
download: gemmlowp-1963df9ac4a0424674e72ef5da522b5d830605fd.tar.gz
3 files changed, 149 insertions, 66 deletions
diff --git a/profiling/instrumentation.h b/profiling/instrumentation.h
index 51b6525..539076a 100644
--- a/profiling/instrumentation.h
+++ b/profiling/instrumentation.h
@@ -1,4 +1,4 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
+// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -52,15 +52,6 @@ using ::uintptr_t;
 #include <set>
 #endif
 
-// We should always use C++11 thread_local; unfortunately that
-// isn't fully supported on Apple yet.
-#ifdef __APPLE__
-#define GEMMLOWP_THREAD_LOCAL static __thread
-#define GEMMLOWP_USING_OLD_THREAD_LOCAL
-#else
-#define GEMMLOWP_THREAD_LOCAL thread_local
-#endif
-
 namespace gemmlowp {
 
 inline void ReleaseBuildAssertion(bool condition, const char* msg) {
@@ -70,41 +61,42 @@ inline void ReleaseBuildAssertion(bool condition, const char* msg) {
   }
 }
 
-// To be used as template parameter for GlobalLock.
-// GlobalLock<ProfilerLockId> is the profiler global lock:
-// registering threads, starting profiling, finishing profiling, and
-// the profiler itself as it samples threads, all need to lock it.
-struct ProfilerLockId;
-
-// A very plain global lock. Templated in LockId so we can have multiple
-// locks, one for each LockId type.
-template <typename LockId>
-class GlobalLock {
-  static pthread_mutex_t* Mutex() {
-    static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
+class Mutex {
+ public:
+  Mutex(const Mutex&) = delete;
+  Mutex& operator=(const Mutex&) = delete;
+
+  Mutex() { pthread_mutex_init(&m, NULL); }
+  ~Mutex() { pthread_mutex_destroy(&m); }
+
+  void Lock() { pthread_mutex_lock(&m); }
+  void Unlock() { pthread_mutex_unlock(&m); }
+
+ private:
+  pthread_mutex_t m;
+};
+
+class GlobalMutexes {
+ public:
+  static Mutex* Profiler() {
+    static Mutex m;
     return &m;
   }
 
- public:
-  static void Lock() { pthread_mutex_lock(Mutex()); }
-  static void Unlock() { pthread_mutex_unlock(Mutex()); }
+  static Mutex* EightBitIntGemm() {
+    static Mutex m;
+    return &m;
+  }
 };
 
-// A very simple RAII helper to lock and unlock a GlobalLock
-template <typename LockId>
-struct AutoGlobalLock {
-  AutoGlobalLock() { GlobalLock<LockId>::Lock(); }
-  ~AutoGlobalLock() { GlobalLock<LockId>::Unlock(); }
-};
+// A very simple RAII helper to lock and unlock a Mutex
+struct ScopedLock {
+  ScopedLock(Mutex* m) : _m(m) { _m->Lock(); }
+  ~ScopedLock() { _m->Unlock(); }
 
-// MemoryBarrier is purely a compile-time thing; it tells two things
-// to the compiler:
-//   1) It prevents reordering code across it
-//     (thanks to the 'volatile' after 'asm')
-//   2) It requires the compiler to assume that any value previously
-//     read from memory, may have changed. Thus it offers an alternative
-//     to using 'volatile' variables.
-inline void MemoryBarrier() { asm volatile("" ::: "memory"); }
+ private:
+  Mutex* _m;
+};
 
 // Profiling definitions. Two paths: when profiling is enabled,
 // and when profiling is disabled.
@@ -115,34 +107,31 @@ inline void MemoryBarrier() { asm volatile("" ::: "memory"); }
 // contains pointers to literal strings that were manually entered
 // in the instrumented code (see ScopedProfilingLabel).
 struct ProfilingStack {
-  static const std::size_t kMaxSize = 15;
+  static const std::size_t kMaxSize = 14;
   typedef const char* LabelsArrayType[kMaxSize];
   LabelsArrayType labels;
   std::size_t size;
+  Mutex* lock;
 
   ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); }
 
   void Push(const char* label) {
-    MemoryBarrier();
+    ScopedLock sl(lock);
     ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow");
     labels[size] = label;
-    MemoryBarrier();
     size++;
-    MemoryBarrier();
   }
 
   void Pop() {
-    MemoryBarrier();
+    ScopedLock sl(lock);
     ReleaseBuildAssertion(size > 0, "ProfilingStack underflow");
     size--;
-    MemoryBarrier();
   }
 
   void UpdateTop(const char* new_label) {
-    MemoryBarrier();
+    ScopedLock sl(lock);
     assert(size);
     labels[size - 1] = new_label;
-    MemoryBarrier();
   }
 
   ProfilingStack& operator=(const ProfilingStack& other) {
@@ -174,29 +163,35 @@ struct ThreadInfo {
   ThreadInfo() {
     pthread_key_create(&key, ThreadExitCallback);
     pthread_setspecific(key, this);
+    stack.lock = new Mutex();
   }
 
   static void ThreadExitCallback(void* ptr) {
-    AutoGlobalLock<ProfilerLockId> lock;
+    ScopedLock sl(GlobalMutexes::Profiler());
     ThreadInfo* self = static_cast<ThreadInfo*>(ptr);
     ThreadsUnderProfiling().erase(self);
     pthread_key_delete(self->key);
+    delete self->stack.lock;
   }
 };
 
 inline ThreadInfo& ThreadLocalThreadInfo() {
-#ifdef GEMMLOWP_USING_OLD_THREAD_LOCAL
-  // We're leaking this ThreadInfo structure, because Apple doesn't support
-  // non-trivial constructors or destructors for their __thread type modifier.
-  GEMMLOWP_THREAD_LOCAL ThreadInfo* i = nullptr;
-  if (i == nullptr) {
-    i = new ThreadInfo();
+  static pthread_key_t key;
+  static auto DeleteThreadInfo = [](void* threadInfoPtr) {
+    ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr);
+    if (threadInfo) {
+      delete threadInfo;
+    }
+  };
+
+  static int key_result = pthread_key_create(&key, DeleteThreadInfo);
+
+  ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key));
+  if (!threadInfo) {
+    threadInfo = new ThreadInfo();
+    pthread_setspecific(key, threadInfo);
   }
-  return *i;
-#else
-  GEMMLOWP_THREAD_LOCAL ThreadInfo i;
-  return i;
-#endif
+  return *threadInfo;
 }
 
 // ScopedProfilingLabel is how one instruments code for profiling
@@ -221,7 +216,7 @@ class ScopedProfilingLabel {
 
 // To be called once on each thread to be profiled.
 inline void RegisterCurrentThreadForProfiling() {
-  AutoGlobalLock<ProfilerLockId> lock;
+  ScopedLock sl(GlobalMutexes::Profiler());
   ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo());
 }
 
diff --git a/profiling/profiler.h b/profiling/profiler.h
index a18c036..018da57 100644
--- a/profiling/profiler.h
+++ b/profiling/profiler.h
@@ -1,4 +1,4 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
+// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -306,12 +306,12 @@ inline pthread_t& ProfilerThread() {
 // In the end, the key atomicity property that we are relying on
 // here is that pointers are changed atomically, and the labels
 // are pointers (to literal strings).
-inline void RecordStack(const ThreadInfo* thread, ProfilingStack* dst) {
+inline void RecordStack(ThreadInfo* thread, ProfilingStack* dst) {
+  ScopedLock sl(thread->stack.lock);
   assert(!dst->size);
   while (dst->size < thread->stack.size) {
     dst->labels[dst->size] = thread->stack.labels[dst->size];
     dst->size++;
-    MemoryBarrier();  // thread->stack can change at any time
   }
 }
 
@@ -330,7 +330,7 @@ inline void* ProfilerThreadFunc(void*) {
   while (!ProfilerThreadShouldFinish()) {
     WaitOneProfilerTick();
     {
-      AutoGlobalLock<ProfilerLockId> lock;
+      ScopedLock sl(GlobalMutexes::Profiler());
       for (auto t : ThreadsUnderProfiling()) {
         ProfilingStack s;
         RecordStack(t, &s);
@@ -347,7 +347,7 @@ inline void* ProfilerThreadFunc(void*) {
 
 // Starts recording samples.
 inline void StartProfiling() {
-  AutoGlobalLock<ProfilerLockId> lock;
+  ScopedLock sl(GlobalMutexes::Profiler());
   ReleaseBuildAssertion(!IsProfiling(), "We're already profiling!");
   IsProfiling() = true;
   ProfilerThreadShouldFinish() = false;
@@ -357,7 +357,7 @@ inline void StartProfiling() {
 // Stops recording samples, and prints a profile tree-view on stdout.
 inline void FinishProfiling() {
   {
-    AutoGlobalLock<ProfilerLockId> lock;
+    ScopedLock sl(GlobalMutexes::Profiler());
     ReleaseBuildAssertion(IsProfiling(), "We weren't profiling!");
     // The ProfilerThreadShouldFinish() mechanism here is really naive and bad,
     // as the scary comments below should make clear.
diff --git a/profiling/pthread_everywhere.h b/profiling/pthread_everywhere.h
new file mode 100644
index 0000000..7e12d66
--- /dev/null
+++ b/profiling/pthread_everywhere.h
@@ -0,0 +1,88 @@
+// Copyright 2017 The Gemmlowp Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// pthread_everywhere.h: Either includes <pthread.h> or implements a
+// subset of pthread functionality on top of C++11 <thread> for portability.
+
+#ifndef GEMMLOWP_PROFILING_PTHREAD_EVERYWHERE_H_
+#define GEMMLOWP_PROFILING_PTHREAD_EVERYWHERE_H_
+
+#include "pthread_everywhere.h"
+
+#ifndef _WIN32
+#define GEMMLOWP_USE_PTHREAD
+#endif
+
+#if defined GEMMLOWP_USE_PTHREAD
+#include <pthread.h>
+#else
+// Implement a small subset of pthread on top of C++11 threads.
+// The function signatures differ from true pthread functions in two ways:
+//  - True pthread functions return int error codes, ours return void.
+//    Rationale: the c++11 <thread> equivalent functions return void
+//    and use exceptions to report errors; we don't want to deal with
+//    exceptions in this code, so we couldn't meaningfully return errors
+//    in the polyfill. Also, the gemmlowp code using these pthread functions
+//    never checks their return values anyway.
+//  - True pthread *_create/*_init functions take pointers to 'attribute'
+//    structs; ours take nullptr_t. That is because gemmlowp always passes
+//    nullptr at the moment, so any support we would code for non-null
+//    attribs would be unused.
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#include <cstddef>
+namespace gemmlowp {
+using pthread_t = std::thread*;
+using pthread_mutex_t = std::mutex*;
+using pthread_cond_t = std::condition_variable*;
+inline void pthread_create(pthread_t* thread, std::nullptr_t, 
+  void *(*start_routine) (void *), void *arg) {
+  *thread = new std::thread(start_routine, arg);
+}
+inline void pthread_join(pthread_t thread, std::nullptr_t) {
+  thread->join();
+}
+inline void pthread_mutex_init(pthread_mutex_t *mutex, std::nullptr_t) {
+  *mutex = new std::mutex;
+}
+inline void pthread_mutex_lock(pthread_mutex_t* mutex) {
+  (*mutex)->lock();
+}
+inline void pthread_mutex_unlock(pthread_mutex_t* mutex) {
+  (*mutex)->unlock();
+}
+inline void pthread_mutex_destroy(pthread_mutex_t *mutex) {
+  delete *mutex;
+}
+inline void pthread_cond_init(pthread_cond_t *cond, std::nullptr_t) {
+  *cond = new std::condition_variable;
+}
+inline void pthread_cond_signal(pthread_cond_t* cond) {
+  (*cond)->notify_one();
+}
+inline void pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) {
+  std::unique_lock<std::mutex> lock(**mutex, std::adopt_lock);
+  (*cond)->wait(lock);
+  // detach lock from mutex so when we leave this conext
+  // the lock is not released
+  lock.release();
+}
+inline void pthread_cond_destroy(pthread_cond_t *cond) {
+  delete *cond;
+}
+}  // end namespace gemmlowp
+#endif
+
+#endif  // GEMMLOWP_PROFILING_PTHREAD_EVERYWHERE_H_
+\ No newline at end of file
author	Miao Wang <miaowang@google.com>	2017-12-12 14:22:24 -0800
committer	Miao Wang <miaowang@google.com>	2017-12-12 16:14:38 -0800
commit	1963df9ac4a0424674e72ef5da522b5d830605fd (patch)
tree	efd8fbbe69f13c4057f2cc5a5b1f7852fd57a2ab /profiling
parent	cbcfdf963151219ca77f54657defabde8d845bac (diff)
download	gemmlowp-1963df9ac4a0424674e72ef5da522b5d830605fd.tar.gz