[scudo] Make local cache be agnostic to the type of node in freelist (#67379)

This change moves the `TransferBatch` and `BatchGroup` out of SizeClassAllocatorLocalCache. It allows us that the node in freelist can store more blocks instead of depending on the number of blocks cached. That means we will be able to store more blocks in each node of freelist and therefore reduce the memory used by BatchClass (with little performance overhead). Note that we haven't enabled that in this patch. This is the first step of this transition. GitOrigin-RevId: b9c6737ba7307308ecb6ec4cecc4c07e48e7c141 Change-Id: Ie2873dc0c3313432b7c53b9a904004b0530b1a18
author: ChiaHungDuan <chiahungduan@google.com> 2023-10-09 13:11:55 -0700
committer: Copybara-Service <copybara-worker@google.com> 2023-10-09 13:56:07 -0700
commit: 8c2b8f43ed3a4002c9fdf190ce1dff758936e777 (patch)
tree: b7a255b89dbe72e1b3bd23b9db8df7aa8f856c8c
parent: 8ffa7d6db7e08bd5725d7282776a94c5760a8dca (diff)
download: scudo-8c2b8f43ed3a4002c9fdf190ce1dff758936e777.tar.gz
5 files changed, 166 insertions, 118 deletions
diff --git a/standalone/allocator_common.h b/standalone/allocator_common.h
new file mode 100644
index 00000000000..95f4776ac59
--- /dev/null
+++ b/standalone/allocator_common.h
@@ -0,0 +1,85 @@
+//===-- allocator_common.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_ALLOCATOR_COMMON_H_
+#define SCUDO_ALLOCATOR_COMMON_H_
+
+#include "common.h"
+#include "list.h"
+
+namespace scudo {
+
+template <class SizeClassAllocator> struct TransferBatch {
+  typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
+  typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;
+
+  static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint;
+  void setFromArray(CompactPtrT *Array, u16 N) {
+    DCHECK_LE(N, MaxNumCached);
+    Count = N;
+    memcpy(Batch, Array, sizeof(Batch[0]) * Count);
+  }
+  void appendFromArray(CompactPtrT *Array, u16 N) {
+    DCHECK_LE(N, MaxNumCached - Count);
+    memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
+    // u16 will be promoted to int by arithmetic type conversion.
+    Count = static_cast<u16>(Count + N);
+  }
+  void appendFromTransferBatch(TransferBatch *B, u16 N) {
+    DCHECK_LE(N, MaxNumCached - Count);
+    DCHECK_GE(B->Count, N);
+    // Append from the back of `B`.
+    memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
+    // u16 will be promoted to int by arithmetic type conversion.
+    Count = static_cast<u16>(Count + N);
+    B->Count = static_cast<u16>(B->Count - N);
+  }
+  void clear() { Count = 0; }
+  void add(CompactPtrT P) {
+    DCHECK_LT(Count, MaxNumCached);
+    Batch[Count++] = P;
+  }
+  void moveToArray(CompactPtrT *Array) {
+    memcpy(Array, Batch, sizeof(Batch[0]) * Count);
+    clear();
+  }
+  u16 getCount() const { return Count; }
+  bool isEmpty() const { return Count == 0U; }
+  CompactPtrT get(u16 I) const {
+    DCHECK_LE(I, Count);
+    return Batch[I];
+  }
+  TransferBatch *Next;
+
+private:
+  CompactPtrT Batch[MaxNumCached];
+  u16 Count;
+};
+
+// A BatchGroup is used to collect blocks. Each group has a group id to
+// identify the group kind of contained blocks.
+template <class SizeClassAllocator> struct BatchGroup {
+  // `Next` is used by IntrusiveList.
+  BatchGroup *Next;
+  // The compact base address of each group
+  uptr CompactPtrGroupBase;
+  // Cache value of SizeClassAllocatorLocalCache::getMaxCached()
+  u16 MaxCachedPerBatch;
+  // Number of blocks pushed into this group. This is an increment-only
+  // counter.
+  uptr PushedBlocks;
+  // This is used to track how many bytes are not in-use since last time we
+  // tried to release pages.
+  uptr BytesInBGAtLastCheckpoint;
+  // Blocks are managed by TransferBatch in a list.
+  SinglyLinkedList<TransferBatch<SizeClassAllocator>> Batches;
+};
+
+} // namespace scudo
+
+#endif // SCUDO_ALLOCATOR_COMMON_H_
diff --git a/standalone/local_cache.h b/standalone/local_cache.h
index 1095eb5f186..8cb02abd16f 100644
--- a/standalone/local_cache.h
+++ b/standalone/local_cache.h
@@ -22,74 +22,6 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
   typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
   typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;
 
-  struct TransferBatch {
-    static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint;
-    void setFromArray(CompactPtrT *Array, u16 N) {
-      DCHECK_LE(N, MaxNumCached);
-      Count = N;
-      memcpy(Batch, Array, sizeof(Batch[0]) * Count);
-    }
-    void appendFromArray(CompactPtrT *Array, u16 N) {
-      DCHECK_LE(N, MaxNumCached - Count);
-      memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
-      // u16 will be promoted to int by arithmetic type conversion.
-      Count = static_cast<u16>(Count + N);
-    }
-    void appendFromTransferBatch(TransferBatch *B, u16 N) {
-      DCHECK_LE(N, MaxNumCached - Count);
-      DCHECK_GE(B->Count, N);
-      // Append from the back of `B`.
-      memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
-      // u16 will be promoted to int by arithmetic type conversion.
-      Count = static_cast<u16>(Count + N);
-      B->Count = static_cast<u16>(B->Count - N);
-    }
-    void clear() { Count = 0; }
-    void add(CompactPtrT P) {
-      DCHECK_LT(Count, MaxNumCached);
-      Batch[Count++] = P;
-    }
-    void copyToArray(CompactPtrT *Array) const {
-      memcpy(Array, Batch, sizeof(Batch[0]) * Count);
-    }
-    u16 getCount() const { return Count; }
-    bool isEmpty() const { return Count == 0U; }
-    CompactPtrT get(u16 I) const {
-      DCHECK_LE(I, Count);
-      return Batch[I];
-    }
-    static u16 getMaxCached(uptr Size) {
-      return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size));
-    }
-    TransferBatch *Next;
-
-  private:
-    CompactPtrT Batch[MaxNumCached];
-    u16 Count;
-  };
-
-  // A BatchGroup is used to collect blocks. Each group has a group id to
-  // identify the group kind of contained blocks.
-  struct BatchGroup {
-    // `Next` is used by IntrusiveList.
-    BatchGroup *Next;
-    // The compact base address of each group
-    uptr CompactPtrGroupBase;
-    // Cache value of TransferBatch::getMaxCached()
-    u16 MaxCachedPerBatch;
-    // Number of blocks pushed into this group. This is an increment-only
-    // counter.
-    uptr PushedBlocks;
-    // This is used to track how many bytes are not in-use since last time we
-    // tried to release pages.
-    uptr BytesInBGAtLastCheckpoint;
-    // Blocks are managed by TransferBatch in a list.
-    SinglyLinkedList<TransferBatch> Batches;
-  };
-
-  static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
-                "BatchGroup uses the same class size as TransferBatch");
-
   void init(GlobalStats *S, SizeClassAllocator *A) {
     DCHECK(isEmpty());
     Stats.init();
@@ -151,7 +83,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
   }
 
   void drain() {
-    // Drain BatchClassId last as createBatch can refill it.
+    // Drain BatchClassId last as it may be needed while draining normal blocks.
     for (uptr I = 0; I < NumClasses; ++I) {
       if (I == BatchClassId)
         continue;
@@ -163,19 +95,11 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
     DCHECK(isEmpty());
   }
 
-  TransferBatch *createBatch(uptr ClassId, void *B) {
-    if (ClassId != BatchClassId)
-      B = allocate(BatchClassId);
+  void *getBatchClassBlock() {
+    void *B = allocate(BatchClassId);
     if (UNLIKELY(!B))
       reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
-    return reinterpret_cast<TransferBatch *>(B);
-  }
-
-  BatchGroup *createGroup() {
-    void *Ptr = allocate(BatchClassId);
-    if (UNLIKELY(!Ptr))
-      reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
-    return reinterpret_cast<BatchGroup *>(Ptr);
+    return B;
   }
 
   LocalStats &getStats() { return Stats; }
@@ -203,6 +127,11 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
       Str->append("    No block is cached.\n");
   }
 
+  static u16 getMaxCached(uptr Size) {
+    return Min(SizeClassMap::MaxNumCachedHint,
+               SizeClassMap::getMaxCachedHint(Size));
+  }
+
 private:
   static const uptr NumClasses = SizeClassMap::NumClasses;
   static const uptr BatchClassId = SizeClassMap::BatchClassId;
@@ -211,7 +140,7 @@ private:
     u16 MaxCount;
     // Note: ClassSize is zero for the transfer batch.
     uptr ClassSize;
-    CompactPtrT Chunks[2 * TransferBatch::MaxNumCached];
+    CompactPtrT Chunks[2 * SizeClassMap::MaxNumCachedHint];
   };
   PerClass PerClassArray[NumClasses] = {};
   LocalStats Stats;
@@ -228,7 +157,7 @@ private:
     for (uptr I = 0; I < NumClasses; I++) {
       PerClass *P = &PerClassArray[I];
       const uptr Size = SizeClassAllocator::getSizeByClassId(I);
-      P->MaxCount = static_cast<u16>(2 * TransferBatch::getMaxCached(Size));
+      P->MaxCount = static_cast<u16>(2 * getMaxCached(Size));
       if (I != BatchClassId) {
         P->ClassSize = Size;
       } else {
@@ -246,15 +175,14 @@ private:
 
   NOINLINE bool refill(PerClass *C, uptr ClassId) {
     initCacheMaybe(C);
-    TransferBatch *B = Allocator->popBatch(this, ClassId);
-    if (UNLIKELY(!B))
-      return false;
-    DCHECK_GT(B->getCount(), 0);
-    C->Count = B->getCount();
-    B->copyToArray(C->Chunks);
-    B->clear();
-    destroyBatch(ClassId, B);
-    return true;
+
+    // TODO(chiahungduan): Pass the max number cached for each size class.
+    const u16 NumBlocksRefilled =
+        Allocator->popBlocks(this, ClassId, C->Chunks);
+    DCHECK_LE(NumBlocksRefilled,
+              getMaxCached(SizeClassAllocator::getSizeByClassId(ClassId)));
+    C->Count += NumBlocksRefilled;
+    return NumBlocksRefilled != 0;
   }
 
   NOINLINE void drain(PerClass *C, uptr ClassId) {
diff --git a/standalone/primary32.h b/standalone/primary32.h
index 533615ad381..e153a3bf871 100644
--- a/standalone/primary32.h
+++ b/standalone/primary32.h
@@ -9,6 +9,7 @@
 #ifndef SCUDO_PRIMARY32_H_
 #define SCUDO_PRIMARY32_H_
 
+#include "allocator_common.h"
 #include "bytemap.h"
 #include "common.h"
 #include "list.h"
@@ -53,8 +54,11 @@ public:
                 "");
   typedef SizeClassAllocator32<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
-  typedef typename CacheT::TransferBatch TransferBatch;
-  typedef typename CacheT::BatchGroup BatchGroup;
+  typedef TransferBatch<ThisT> TransferBatch;
+  typedef BatchGroup<ThisT> BatchGroup;
+
+  static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
+                "BatchGroup uses the same class size as TransferBatch");
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
@@ -187,6 +191,21 @@ public:
     return BlockSize > PageSize;
   }
 
+  u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray) {
+    TransferBatch *B = popBatch(C, ClassId);
+    if (!B)
+      return 0;
+
+    const u16 Count = B->getCount();
+    DCHECK_GT(Count, 0U);
+    B->moveToArray(ToArray);
+
+    if (ClassId != SizeClassMap::BatchClassId)
+      C->deallocate(SizeClassMap::BatchClassId, B);
+
+    return Count;
+  }
+
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     SizeClassInfo *Sci = getSizeClassInfo(ClassId);
@@ -520,8 +539,8 @@ private:
       // from `CreateGroup` in `pushBlocksImpl`
       BG->PushedBlocks = 1;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch = TransferBatch::getMaxCached(
-          getSizeByClassId(SizeClassMap::BatchClassId));
+      BG->MaxCachedPerBatch =
+          CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));
 
       Sci->FreeListInfo.BlockList.push_front(BG);
     }
@@ -600,17 +619,17 @@ private:
     DCHECK_GT(Size, 0U);
 
     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
-      BatchGroup *BG = C->createGroup();
+      BatchGroup *BG = reinterpret_cast<BatchGroup *>(C->getBatchClassBlock());
       BG->Batches.clear();
-      TransferBatch *TB = C->createBatch(ClassId, nullptr);
+      TransferBatch *TB =
+          reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
       TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch =
-          TransferBatch::getMaxCached(getSizeByClassId(ClassId));
+      BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
 
       return BG;
     };
@@ -625,9 +644,7 @@ private:
         u16 UnusedSlots =
             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
         if (UnusedSlots == 0) {
-          CurBatch = C->createBatch(
-              ClassId,
-              reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
+          CurBatch = reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
           CurBatch->clear();
           Batches.push_front(CurBatch);
           UnusedSlots = BG->MaxCachedPerBatch;
@@ -775,7 +792,7 @@ private:
     }
 
     const uptr Size = getSizeByClassId(ClassId);
-    const u16 MaxCount = TransferBatch::getMaxCached(Size);
+    const u16 MaxCount = CacheT::getMaxCached(Size);
     DCHECK_GT(MaxCount, 0U);
     // The maximum number of blocks we should carve in the region is dictated
     // by the maximum number of batches we want to fill, and the amount of
diff --git a/standalone/primary64.h b/standalone/primary64.h
index 6d160b4c64d..7d555684e4e 100644
--- a/standalone/primary64.h
+++ b/standalone/primary64.h
@@ -9,6 +9,7 @@
 #ifndef SCUDO_PRIMARY64_H_
 #define SCUDO_PRIMARY64_H_
 
+#include "allocator_common.h"
 #include "bytemap.h"
 #include "common.h"
 #include "list.h"
@@ -55,8 +56,11 @@ public:
   static const uptr GroupScale = GroupSizeLog - CompactPtrScale;
   typedef SizeClassAllocator64<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
-  typedef typename CacheT::TransferBatch TransferBatch;
-  typedef typename CacheT::BatchGroup BatchGroup;
+  typedef TransferBatch<ThisT> TransferBatch;
+  typedef BatchGroup<ThisT> BatchGroup;
+
+  static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
+                "BatchGroup uses the same class size as TransferBatch");
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
@@ -203,6 +207,21 @@ public:
     DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
   }
 
+  u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray) {
+    TransferBatch *B = popBatch(C, ClassId);
+    if (!B)
+      return 0;
+
+    const u16 Count = B->getCount();
+    DCHECK_GT(Count, 0U);
+    B->moveToArray(ToArray);
+
+    if (ClassId != SizeClassMap::BatchClassId)
+      C->deallocate(SizeClassMap::BatchClassId, B);
+
+    return Count;
+  }
+
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     RegionInfo *Region = getRegionInfo(ClassId);
@@ -630,8 +649,8 @@ private:
       // from `CreateGroup` in `pushBlocksImpl`
       BG->PushedBlocks = 1;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch = TransferBatch::getMaxCached(
-          getSizeByClassId(SizeClassMap::BatchClassId));
+      BG->MaxCachedPerBatch =
+          CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));
 
       Region->FreeListInfo.BlockList.push_front(BG);
     }
@@ -709,17 +728,17 @@ private:
     DCHECK_GT(Size, 0U);
 
     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
-      BatchGroup *BG = C->createGroup();
+      BatchGroup *BG = reinterpret_cast<BatchGroup *>(C->getBatchClassBlock());
       BG->Batches.clear();
-      TransferBatch *TB = C->createBatch(ClassId, nullptr);
+      TransferBatch *TB =
+          reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
       TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch =
-          TransferBatch::getMaxCached(getSizeByClassId(ClassId));
+      BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
 
       return BG;
     };
@@ -734,9 +753,7 @@ private:
         u16 UnusedSlots =
             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
         if (UnusedSlots == 0) {
-          CurBatch = C->createBatch(
-              ClassId,
-              reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
+          CurBatch = reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
           CurBatch->clear();
           Batches.push_front(CurBatch);
           UnusedSlots = BG->MaxCachedPerBatch;
@@ -867,7 +884,7 @@ private:
                                                       RegionInfo *Region)
       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
     const uptr Size = getSizeByClassId(ClassId);
-    const u16 MaxCount = TransferBatch::getMaxCached(Size);
+    const u16 MaxCount = CacheT::getMaxCached(Size);
 
     const uptr RegionBeg = Region->RegionBeg;
     const uptr MappedUser = Region->MemMapInfo.MappedUser;
diff --git a/standalone/tests/primary_test.cpp b/standalone/tests/primary_test.cpp
index 074977ff27e..e368f521bea 100644
--- a/standalone/tests/primary_test.cpp
+++ b/standalone/tests/primary_test.cpp
@@ -207,7 +207,7 @@ struct SmallRegionsConfig {
 // For the 32-bit one, it requires actually exhausting memory, so we skip it.
 TEST(ScudoPrimaryTest, Primary64OOM) {
   using Primary = scudo::SizeClassAllocator64<SmallRegionsConfig>;
-  using TransferBatch = Primary::CacheT::TransferBatch;
+  using TransferBatch = Primary::TransferBatch;
   Primary Allocator;
   Allocator.init(/*ReleaseToOsInterval=*/-1);
   typename Primary::CacheT Cache;
@@ -233,8 +233,9 @@ TEST(ScudoPrimaryTest, Primary64OOM) {
   while (!Batches.empty()) {
     TransferBatch *B = Batches.back();
     Batches.pop_back();
-    B->copyToArray(Blocks);
-    Allocator.pushBlocks(&Cache, ClassId, Blocks, B->getCount());
+    const scudo::u16 Count = B->getCount();
+    B->moveToArray(Blocks);
+    Allocator.pushBlocks(&Cache, ClassId, Blocks, Count);
     Cache.deallocate(Primary::SizeClassMap::BatchClassId, B);
   }
   Cache.destroy(nullptr);
author	ChiaHungDuan <chiahungduan@google.com>	2023-10-09 13:11:55 -0700
committer	Copybara-Service <copybara-worker@google.com>	2023-10-09 13:56:07 -0700
commit	8c2b8f43ed3a4002c9fdf190ce1dff758936e777 (patch)
tree	b7a255b89dbe72e1b3bd23b9db8df7aa8f856c8c
parent	8ffa7d6db7e08bd5725d7282776a94c5760a8dca (diff)
download	scudo-8c2b8f43ed3a4002c9fdf190ce1dff758936e777.tar.gz