aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenoit Jacob <benoitjacob@google.com>2020-05-06 13:27:04 -0700
committerCopybara-Service <copybara-worker@google.com>2020-05-06 13:27:24 -0700
commit210c986be9454628fb5a17442380533bf5f39fa8 (patch)
tree0832d95990cc684ea917ccc5b3b177baaf0d7373
parent1b313682ef8b8fc8ed08719c610d1c3503b016bf (diff)
downloadruy-210c986be9454628fb5a17442380533bf5f39fa8.tar.gz
Move SystemAligned{Alloc,Free} functions to their own library as they are used independently of Allocator.
Merge detail::AlignedAllocator into Allocator, I didn't see a need to keep separate classes. Move Allocator method implementations to allocator.cc. Remove ToVoidPtr, it only had a single use. It was also possible to do without a reinterpret_cast, as the language guarantees sizeof(char)==1 and pointer casts to/from void* don't need reinterpret_cast. Trim dependencies if :prepacked_cache. PiperOrigin-RevId: 310216377
-rw-r--r--ruy/BUILD18
-rw-r--r--ruy/allocator.cc57
-rw-r--r--ruy/allocator.h155
-rw-r--r--ruy/prepacked_cache.cc2
-rw-r--r--ruy/system_aligned_alloc.cc51
-rw-r--r--ruy/system_aligned_alloc.h53
6 files changed, 189 insertions, 147 deletions
diff --git a/ruy/BUILD b/ruy/BUILD
index 174ad1f..95b2dd5 100644
--- a/ruy/BUILD
+++ b/ruy/BUILD
@@ -121,6 +121,17 @@ cc_library(
)
cc_library(
+ name = "system_aligned_alloc",
+ srcs = [
+ "system_aligned_alloc.cc",
+ ],
+ hdrs = [
+ "system_aligned_alloc.h",
+ ],
+ copts = ruy_copts_base(),
+)
+
+cc_library(
name = "prepacked_cache",
srcs = [
"prepacked_cache.cc",
@@ -130,11 +141,8 @@ cc_library(
],
copts = ruy_copts_base(),
deps = [
- ":allocator",
":mat",
- ":opt_set",
- ":platform",
- ":time",
+ ":system_aligned_alloc",
"//ruy/profiler:instrumentation",
],
)
@@ -182,8 +190,8 @@ cc_library(
],
copts = ruy_copts_base(),
deps = [
- ":check_macros",
":size_util",
+ ":system_aligned_alloc",
],
)
diff --git a/ruy/allocator.cc b/ruy/allocator.cc
index d8fb738..7fdf73a 100644
--- a/ruy/allocator.cc
+++ b/ruy/allocator.cc
@@ -15,37 +15,44 @@ limitations under the License.
#include "ruy/allocator.h"
-#include <cstdint>
-#include <cstdlib>
-
-#ifdef _WIN32
-#include <malloc.h>
-#endif
+#include "ruy/system_aligned_alloc.h"
namespace ruy {
-namespace detail {
-
-void *SystemAlignedAlloc(std::ptrdiff_t num_bytes) {
-#ifdef _WIN32
- return _aligned_malloc(num_bytes, kMinimumBlockAlignment);
-#else
- void *ptr;
- if (posix_memalign(&ptr, kMinimumBlockAlignment, num_bytes)) {
- return nullptr;
- }
- return ptr;
-#endif
+Allocator::~Allocator() {
+ FreeAll();
+ detail::SystemAlignedFree(ptr_);
}
-void SystemAlignedFree(void *ptr) {
-#ifdef _WIN32
- _aligned_free(ptr);
-#else
- free(ptr);
-#endif
+void* Allocator::AllocateSlow(std::ptrdiff_t num_bytes) {
+ void* p = detail::SystemAlignedAlloc(num_bytes);
+ fallback_blocks_total_size_ += num_bytes;
+ fallback_blocks_.push_back(p);
+ return p;
}
-} // namespace detail
+void Allocator::FreeAll() {
+ current_ = 0;
+ if (fallback_blocks_.empty()) {
+ return;
+ }
+
+ // No rounding-up of the size means linear instead of logarithmic
+ // bound on the number of allocation in some worst-case calling patterns.
+ // This is considered worth it because minimizing memory usage is important
+ // and actual calling patterns in applications that we care about still
+ // reach the no-further-allocations steady state in a small finite number
+ // of iterations.
+ std::ptrdiff_t new_size = size_ + fallback_blocks_total_size_;
+ detail::SystemAlignedFree(ptr_);
+ ptr_ = detail::SystemAlignedAlloc(new_size);
+ size_ = new_size;
+
+ for (void* p : fallback_blocks_) {
+ detail::SystemAlignedFree(p);
+ }
+ fallback_blocks_.clear();
+ fallback_blocks_total_size_ = 0;
+}
} // namespace ruy
diff --git a/ruy/allocator.h b/ruy/allocator.h
index 2df0a22..20cc7c3 100644
--- a/ruy/allocator.h
+++ b/ruy/allocator.h
@@ -21,50 +21,18 @@ limitations under the License.
#include <memory>
#include <vector>
-#include "ruy/check_macros.h"
#include "ruy/size_util.h"
+#include "ruy/system_aligned_alloc.h"
namespace ruy {
-namespace detail {
-
-inline void* VoidPtrAdd(void* p, std::ptrdiff_t offset) {
- RUY_DCHECK(p);
- std::uintptr_t addr = reinterpret_cast<std::uintptr_t>(p) + offset;
- return reinterpret_cast<void*>(addr);
-}
-
-// Minimum alignment for blocks.
-//
-// Considerations:
-// - This needs to be at least the alignment of any usual data type.
-// - It's useful that this is at least the size of a cache line to limit
-// possible cache side effects (if only on performance behavior).
-// - It's useful that this is at least the size of SIMD registers, as
-// some SIMD instruction sets have at least performance behavior
-// differences (e.g. NEON) or even different requirements (e.g. SSE)
-// based on that.
-// - It's useful that this is at least the size of an "exclusive reservation
-// granule" on ARM, meaning that if we use this Allocator to allocate
-// an atomic variable, there will be no side effects from other things
-// contending for exclusive/atomic memory accesses to it. While the
-// ARM reference manual mentions that this granule size may be as large
-// as 2048 bytes, in practice we observe it to be 64 bytes. It can
-// be queried cheaply, at runtime, from userspace, if needed.
-static constexpr std::ptrdiff_t kMinimumBlockAlignment = 64;
-
-// Primitive allocation functions obtaining aligned memory from the
-// operating system.
-void* SystemAlignedAlloc(std::ptrdiff_t num_bytes);
-void SystemAlignedFree(void* ptr);
-
// Specialized allocator designed to converge to a steady-state where all
// allocations are bump-ptr allocations from an already-allocated buffer.
//
// To support these constraints, this allocator only supports two
// operations.
-// - AllocateAlignedBytes: allocates a pointer to storage of a specified
-// size, which must be aligned to kMinimumBlockAlignment.
+// - AllocateBytes/Allocate<Pointer>: allocates a pointer to storage of a
+// specified size, which will be aligned to kMinimumBlockAlignment.
// - FreeAll: frees all previous allocations (but retains the internal
// buffer to minimize future calls into the system allocator).
//
@@ -75,79 +43,59 @@ void SystemAlignedFree(void* ptr);
// SystemAlignedAlloc/SystemAlignedFree.
//
// All operations happen on aligned blocks for simplicity.
-class AlignedAllocator {
+//
+// Theory of operation:
+//
+// - ptr_, current_, and size_ implement a basic bump-ptr allocator.
+//
+// - in AllocateBytes, the fast path is just a bump-ptr
+// allocation. If our bump-ptr allocator doesn't have enough space for an
+// allocation, then we allocate a block from the system allocator to
+// service the allocation request. We save that block in fallback_blocks_
+// and track the total size of the fallback blocks in
+// fallback_blocks_total_size_.
+//
+// - in FreeAll, the fast path just resets the bump-ptr allocator. If
+// there are any fallback blocks, we free them and reallocate the
+// bump-ptr allocator's buffer so that the next sequence of allocations
+// will hopefully not need any fallback blocks.
+class Allocator final {
public:
- void operator=(const AlignedAllocator&) = delete;
- ~AlignedAllocator() {
- FreeAll();
- SystemAlignedFree(ptr_);
- }
+ ~Allocator();
- void* AllocateAlignedBytes(std::ptrdiff_t num_bytes) {
- RUY_DCHECK_GT(num_bytes, 0);
- RUY_DCHECK((num_bytes & (kMinimumBlockAlignment - 1)) == 0);
- if (void* p = AllocateFast(num_bytes)) {
+ void* AllocateBytes(std::ptrdiff_t num_bytes) {
+ if (num_bytes == 0) {
+ return nullptr;
+ }
+ const std::ptrdiff_t rounded_num_bytes =
+ round_up_pot(num_bytes, detail::kMinimumBlockAlignment);
+ if (void* p = AllocateFast(rounded_num_bytes)) {
return p;
}
- return AllocateSlow(num_bytes);
+ return AllocateSlow(rounded_num_bytes);
}
- void FreeAll() {
- current_ = 0;
- if (fallback_blocks_.empty()) {
- return;
- }
-
- // No rounding-up of the size means linear instead of logarithmic
- // bound on the number of allocation in some worst-case calling patterns.
- // This is considered worth it because minimizing memory usage is important
- // and actual calling patterns in applications that we care about still
- // reach the no-further-allocations steady state in a small finite number
- // of iterations.
- std::ptrdiff_t new_size = size_ + fallback_blocks_total_size_;
- SystemAlignedFree(ptr_);
- ptr_ = SystemAlignedAlloc(new_size);
- size_ = new_size;
-
- for (void* p : fallback_blocks_) {
- SystemAlignedFree(p);
- }
- fallback_blocks_.clear();
- fallback_blocks_total_size_ = 0;
+ template <typename Pointer>
+ void Allocate(std::ptrdiff_t count, Pointer* out) {
+ using T = typename std::pointer_traits<Pointer>::element_type;
+ *out = static_cast<T*>(AllocateBytes(count * sizeof(T)));
}
+ void FreeAll();
+
private:
+ void operator=(const Allocator&) = delete;
+ void* AllocateSlow(std::ptrdiff_t num_bytes);
+
void* AllocateFast(std::ptrdiff_t num_bytes) {
if (current_ + num_bytes > size_) {
return nullptr;
}
- void* ret = VoidPtrAdd(ptr_, current_);
+ void* ret = static_cast<char*>(ptr_) + current_;
current_ += num_bytes;
return ret;
}
- void* AllocateSlow(std::ptrdiff_t num_bytes) {
- void* p = SystemAlignedAlloc(num_bytes);
- fallback_blocks_total_size_ += num_bytes;
- fallback_blocks_.push_back(p);
- return p;
- }
-
- // Theory of operation:
- //
- // - ptr_, current_, and size_ implement a basic bump-ptr allocator.
- //
- // - in AllocateAlignedBytes, the fast path is just a bump-ptr
- // allocation. If our bump-ptr allocator doesn't have enough space for an
- // allocation, then we allocate a block from the system allocator to
- // service the allocation request. We save that block in fallback_blocks_
- // and track the total size of the fallback blocks in
- // fallback_blocks_total_size_.
- //
- // - in FreeAll, the fast path just resets the bump-ptr allocator. If
- // there are any fallback blocks, we free them and reallocate the
- // bump-ptr allocator's buffer so that the next sequence of allocations
- // will hopefully not need any fallback blocks.
void* ptr_ = nullptr;
std::ptrdiff_t current_ = 0;
std::ptrdiff_t size_ = 0;
@@ -155,31 +103,6 @@ class AlignedAllocator {
std::ptrdiff_t fallback_blocks_total_size_ = 0;
};
-} // namespace detail
-
-// The main Allocator class, with a convenient interface for allocating a
-// typed buffer.
-class Allocator {
- public:
- void* AllocateBytes(std::ptrdiff_t num_bytes) {
- if (num_bytes == 0) {
- return nullptr;
- }
- return aligned.AllocateAlignedBytes(
- round_up_pot(num_bytes, detail::kMinimumBlockAlignment));
- }
- template <typename Pointer>
- void Allocate(std::ptrdiff_t count, Pointer* out) {
- using T = typename std::pointer_traits<Pointer>::element_type;
- *out = static_cast<T*>(AllocateBytes(count * sizeof(T)));
- }
-
- void FreeAll() { aligned.FreeAll(); }
-
- private:
- detail::AlignedAllocator aligned;
-};
-
} // namespace ruy
#endif // RUY_RUY_ALLOCATOR_H_
diff --git a/ruy/prepacked_cache.cc b/ruy/prepacked_cache.cc
index 025ba7f..ee891cb 100644
--- a/ruy/prepacked_cache.cc
+++ b/ruy/prepacked_cache.cc
@@ -15,9 +15,9 @@ limitations under the License.
#include "ruy/prepacked_cache.h"
-#include "ruy/allocator.h"
#include "ruy/mat.h"
#include "ruy/profiler/instrumentation.h"
+#include "ruy/system_aligned_alloc.h"
namespace ruy {
diff --git a/ruy/system_aligned_alloc.cc b/ruy/system_aligned_alloc.cc
new file mode 100644
index 0000000..7c86691
--- /dev/null
+++ b/ruy/system_aligned_alloc.cc
@@ -0,0 +1,51 @@
+/* Copyright 2019 Google LLC. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ruy/system_aligned_alloc.h"
+
+#include <cstddef>
+#include <cstdlib>
+
+#ifdef _WIN32
+#include <malloc.h>
+#endif
+
+namespace ruy {
+
+namespace detail {
+
+void *SystemAlignedAlloc(std::ptrdiff_t num_bytes) {
+#ifdef _WIN32
+ return _aligned_malloc(num_bytes, kMinimumBlockAlignment);
+#else
+ void *ptr;
+ if (posix_memalign(&ptr, kMinimumBlockAlignment, num_bytes)) {
+ return nullptr;
+ }
+ return ptr;
+#endif
+}
+
+void SystemAlignedFree(void *ptr) {
+#ifdef _WIN32
+ _aligned_free(ptr);
+#else
+ free(ptr);
+#endif
+}
+
+} // namespace detail
+
+} // namespace ruy
diff --git a/ruy/system_aligned_alloc.h b/ruy/system_aligned_alloc.h
new file mode 100644
index 0000000..5604b5c
--- /dev/null
+++ b/ruy/system_aligned_alloc.h
@@ -0,0 +1,53 @@
+/* Copyright 2019 Google LLC. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef RUY_RUY_SYSTEM_ALIGNED_ALLOC_H_
+#define RUY_RUY_SYSTEM_ALIGNED_ALLOC_H_
+
+#include <cstddef>
+
+namespace ruy {
+
+namespace detail {
+
+// Minimum alignment for blocks.
+//
+// Considerations:
+// - This needs to be at least the alignment of any usual data type.
+// - It's useful that this is at least the size of a cache line to limit
+// possible cache side effects (if only on performance behavior).
+// - It's useful that this is at least the size of SIMD registers, as
+// some SIMD instruction sets have at least performance behavior
+// differences (e.g. NEON) or even different requirements (e.g. SSE)
+// based on that.
+// - It's useful that this is at least the size of an "exclusive reservation
+// granule" on ARM, meaning that if we use this Allocator to allocate
+// an atomic variable, there will be no side effects from other things
+// contending for exclusive/atomic memory accesses to it. While the
+// ARM reference manual mentions that this granule size may be as large
+// as 2048 bytes, in practice we observe it to be 64 bytes. It can
+// be queried cheaply, at runtime, from userspace, if needed.
+static constexpr std::ptrdiff_t kMinimumBlockAlignment = 64;
+
+// Primitive allocation functions obtaining aligned memory from the
+// operating system.
+void* SystemAlignedAlloc(std::ptrdiff_t num_bytes);
+void SystemAlignedFree(void* ptr);
+
+} // namespace detail
+
+} // namespace ruy
+
+#endif // RUY_RUY_SYSTEM_ALIGNED_ALLOC_H_