summaryrefslogtreecommitdiff
path: root/standalone/release.h
diff options
context:
space:
mode:
Diffstat (limited to 'standalone/release.h')
-rw-r--r--standalone/release.h610
1 files changed, 481 insertions, 129 deletions
diff --git a/standalone/release.h b/standalone/release.h
index 293a8bc27ba..9ffc88df4f3 100644
--- a/standalone/release.h
+++ b/standalone/release.h
@@ -11,14 +11,46 @@
#include "common.h"
#include "list.h"
+#include "mem_map.h"
#include "mutex.h"
+#include "thread_annotations.h"
namespace scudo {
+template <typename MemMapT> class RegionReleaseRecorder {
+public:
+ RegionReleaseRecorder(MemMapT *RegionMemMap, uptr Base, uptr Offset = 0)
+ : RegionMemMap(RegionMemMap), Base(Base), Offset(Offset) {}
+
+ uptr getReleasedRangesCount() const { return ReleasedRangesCount; }
+
+ uptr getReleasedBytes() const { return ReleasedBytes; }
+
+ uptr getBase() const { return Base; }
+
+ // Releases [From, To) range of pages back to OS. Note that `From` and `To`
+ // are offseted from `Base` + Offset.
+ void releasePageRangeToOS(uptr From, uptr To) {
+ const uptr Size = To - From;
+ RegionMemMap->releasePagesToOS(getBase() + Offset + From, Size);
+ ReleasedRangesCount++;
+ ReleasedBytes += Size;
+ }
+
+private:
+ uptr ReleasedRangesCount = 0;
+ uptr ReleasedBytes = 0;
+ MemMapT *RegionMemMap = nullptr;
+ uptr Base = 0;
+ // The release offset from Base. This is used when we know a given range after
+ // Base will not be released.
+ uptr Offset = 0;
+};
+
class ReleaseRecorder {
public:
- ReleaseRecorder(uptr Base, MapPlatformData *Data = nullptr)
- : Base(Base), Data(Data) {}
+ ReleaseRecorder(uptr Base, uptr Offset = 0, MapPlatformData *Data = nullptr)
+ : Base(Base), Offset(Offset), Data(Data) {}
uptr getReleasedRangesCount() const { return ReleasedRangesCount; }
@@ -29,7 +61,7 @@ public:
// Releases [From, To) range of pages back to OS.
void releasePageRangeToOS(uptr From, uptr To) {
const uptr Size = To - From;
- releasePagesToOS(Base, From, Size, Data);
+ releasePagesToOS(Base, From + Offset, Size, Data);
ReleasedRangesCount++;
ReleasedBytes += Size;
}
@@ -37,31 +69,158 @@ public:
private:
uptr ReleasedRangesCount = 0;
uptr ReleasedBytes = 0;
+ // The starting address to release. Note that we may want to combine (Base +
+ // Offset) as a new Base. However, the Base is retrieved from
+ // `MapPlatformData` on Fuchsia, which means the offset won't be aware.
+ // Therefore, store them separately to make it work on all the platforms.
uptr Base = 0;
+ // The release offset from Base. This is used when we know a given range after
+ // Base will not be released.
+ uptr Offset = 0;
MapPlatformData *Data = nullptr;
};
-// A packed array of Counters. Each counter occupies 2^N bits, enough to store
-// counter's MaxValue. Ctor will try to use a static buffer first, and if that
-// fails (the buffer is too small or already locked), will allocate the
+// A buffer pool which holds a fixed number of static buffers for fast buffer
+// allocation. If the request size is greater than `StaticBufferSize`, it'll
+// delegate the allocation to map().
+template <uptr StaticBufferCount, uptr StaticBufferSize> class BufferPool {
+public:
+ // Preserve 1 bit in the `Mask` so that we don't need to do zero-check while
+ // extracting the least significant bit from the `Mask`.
+ static_assert(StaticBufferCount < SCUDO_WORDSIZE, "");
+ static_assert(isAligned(StaticBufferSize, SCUDO_CACHE_LINE_SIZE), "");
+
+ // Return a buffer which is at least `BufferSize`.
+ uptr *getBuffer(const uptr BufferSize) {
+ if (UNLIKELY(BufferSize > StaticBufferSize))
+ return getDynamicBuffer(BufferSize);
+
+ uptr index;
+ {
+ // TODO: In general, we expect this operation should be fast so the
+ // waiting thread won't be put into sleep. The HybridMutex does implement
+ // the busy-waiting but we may want to review the performance and see if
+ // we need an explict spin lock here.
+ ScopedLock L(Mutex);
+ index = getLeastSignificantSetBitIndex(Mask);
+ if (index < StaticBufferCount)
+ Mask ^= static_cast<uptr>(1) << index;
+ }
+
+ if (index >= StaticBufferCount)
+ return getDynamicBuffer(BufferSize);
+
+ const uptr Offset = index * StaticBufferSize;
+ memset(&RawBuffer[Offset], 0, StaticBufferSize);
+ return &RawBuffer[Offset];
+ }
+
+ void releaseBuffer(uptr *Buffer, const uptr BufferSize) {
+ const uptr index = getStaticBufferIndex(Buffer, BufferSize);
+ if (index < StaticBufferCount) {
+ ScopedLock L(Mutex);
+ DCHECK_EQ((Mask & (static_cast<uptr>(1) << index)), 0U);
+ Mask |= static_cast<uptr>(1) << index;
+ } else {
+ unmap(reinterpret_cast<void *>(Buffer),
+ roundUp(BufferSize, getPageSizeCached()));
+ }
+ }
+
+ bool isStaticBufferTestOnly(uptr *Buffer, uptr BufferSize) {
+ return getStaticBufferIndex(Buffer, BufferSize) < StaticBufferCount;
+ }
+
+private:
+ uptr getStaticBufferIndex(uptr *Buffer, uptr BufferSize) {
+ if (UNLIKELY(BufferSize > StaticBufferSize))
+ return StaticBufferCount;
+
+ const uptr BufferBase = reinterpret_cast<uptr>(Buffer);
+ const uptr RawBufferBase = reinterpret_cast<uptr>(RawBuffer);
+
+ if (BufferBase < RawBufferBase ||
+ BufferBase >= RawBufferBase + sizeof(RawBuffer)) {
+ return StaticBufferCount;
+ }
+
+ DCHECK_LE(BufferSize, StaticBufferSize);
+ DCHECK_LE(BufferBase + BufferSize, RawBufferBase + sizeof(RawBuffer));
+ DCHECK_EQ((BufferBase - RawBufferBase) % StaticBufferSize, 0U);
+
+ const uptr index =
+ (BufferBase - RawBufferBase) / (StaticBufferSize * sizeof(uptr));
+ DCHECK_LT(index, StaticBufferCount);
+ return index;
+ }
+
+ uptr *getDynamicBuffer(const uptr BufferSize) {
+ // When using a heap-based buffer, precommit the pages backing the
+ // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization
+ // where page fault exceptions are skipped as the allocated memory
+ // is accessed. So far, this is only enabled on Fuchsia. It hasn't proven a
+ // performance benefit on other platforms.
+ const uptr MmapFlags = MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0);
+ return reinterpret_cast<uptr *>(
+ map(nullptr, roundUp(BufferSize, getPageSizeCached()), "scudo:counters",
+ MmapFlags, &MapData));
+ }
+
+ HybridMutex Mutex;
+ // '1' means that buffer index is not used. '0' means the buffer is in use.
+ uptr Mask GUARDED_BY(Mutex) = ~static_cast<uptr>(0);
+ uptr RawBuffer[StaticBufferCount * StaticBufferSize] GUARDED_BY(Mutex);
+ [[no_unique_address]] MapPlatformData MapData = {};
+};
+
+// A Region page map is used to record the usage of pages in the regions. It
+// implements a packed array of Counters. Each counter occupies 2^N bits, enough
+// to store counter's MaxValue. Ctor will try to use a static buffer first, and
+// if that fails (the buffer is too small or already locked), will allocate the
// required Buffer via map(). The caller is expected to check whether the
// initialization was successful by checking isAllocated() result. For
// performance sake, none of the accessors check the validity of the arguments,
// It is assumed that Index is always in [0, N) range and the value is not
// incremented past MaxValue.
-class PackedCounterArray {
+class RegionPageMap {
public:
- PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion,
- uptr MaxValue)
- : Regions(NumberOfRegions), NumCounters(CountersPerRegion) {
- DCHECK_GT(Regions, 0);
- DCHECK_GT(NumCounters, 0);
+ RegionPageMap()
+ : Regions(0),
+ NumCounters(0),
+ CounterSizeBitsLog(0),
+ CounterMask(0),
+ PackingRatioLog(0),
+ BitOffsetMask(0),
+ SizePerRegion(0),
+ BufferSize(0),
+ Buffer(nullptr) {}
+ RegionPageMap(uptr NumberOfRegions, uptr CountersPerRegion, uptr MaxValue) {
+ reset(NumberOfRegions, CountersPerRegion, MaxValue);
+ }
+ ~RegionPageMap() {
+ if (!isAllocated())
+ return;
+ Buffers.releaseBuffer(Buffer, BufferSize);
+ Buffer = nullptr;
+ }
+
+ // Lock of `StaticBuffer` is acquired conditionally and there's no easy way to
+ // specify the thread-safety attribute properly in current code structure.
+ // Besides, it's the only place we may want to check thread safety. Therefore,
+ // it's fine to bypass the thread-safety analysis now.
+ void reset(uptr NumberOfRegion, uptr CountersPerRegion, uptr MaxValue) {
+ DCHECK_GT(NumberOfRegion, 0);
+ DCHECK_GT(CountersPerRegion, 0);
DCHECK_GT(MaxValue, 0);
+
+ Regions = NumberOfRegion;
+ NumCounters = CountersPerRegion;
+
constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL;
// Rounding counter storage size up to the power of two allows for using
// bit shifts calculating particular counter's Index and offset.
const uptr CounterSizeBits =
- roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1);
+ roundUpPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1);
DCHECK_LE(CounterSizeBits, MaxCounterBits);
CounterSizeBitsLog = getLog2(CounterSizeBits);
CounterMask = ~(static_cast<uptr>(0)) >> (MaxCounterBits - CounterSizeBits);
@@ -72,27 +231,10 @@ public:
BitOffsetMask = PackingRatio - 1;
SizePerRegion =
- roundUpTo(NumCounters, static_cast<uptr>(1U) << PackingRatioLog) >>
+ roundUp(NumCounters, static_cast<uptr>(1U) << PackingRatioLog) >>
PackingRatioLog;
BufferSize = SizePerRegion * sizeof(*Buffer) * Regions;
- if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) &&
- Mutex.tryLock()) {
- Buffer = &StaticBuffer[0];
- memset(Buffer, 0, BufferSize);
- } else {
- Buffer = reinterpret_cast<uptr *>(
- map(nullptr, roundUpTo(BufferSize, getPageSizeCached()),
- "scudo:counters", MAP_ALLOWNOMEM));
- }
- }
- ~PackedCounterArray() {
- if (!isAllocated())
- return;
- if (Buffer == &StaticBuffer[0])
- Mutex.unlock();
- else
- unmap(reinterpret_cast<void *>(Buffer),
- roundUpTo(BufferSize, getPageSizeCached()));
+ Buffer = Buffers.getBuffer(BufferSize);
}
bool isAllocated() const { return !!Buffer; }
@@ -112,10 +254,22 @@ public:
const uptr Index = I >> PackingRatioLog;
const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog;
DCHECK_LT(BitOffset, SCUDO_WORDSIZE);
+ DCHECK_EQ(isAllCounted(Region, I), false);
Buffer[Region * SizePerRegion + Index] += static_cast<uptr>(1U)
<< BitOffset;
}
+ void incN(uptr Region, uptr I, uptr N) const {
+ DCHECK_GT(N, 0U);
+ DCHECK_LE(N, CounterMask);
+ DCHECK_LE(get(Region, I), CounterMask - N);
+ const uptr Index = I >> PackingRatioLog;
+ const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog;
+ DCHECK_LT(BitOffset, SCUDO_WORDSIZE);
+ DCHECK_EQ(isAllCounted(Region, I), false);
+ Buffer[Region * SizePerRegion + Index] += N << BitOffset;
+ }
+
void incRange(uptr Region, uptr From, uptr To) const {
DCHECK_LE(From, To);
const uptr Top = Min(To + 1, NumCounters);
@@ -123,13 +277,43 @@ public:
inc(Region, I);
}
- uptr getBufferSize() const { return BufferSize; }
+ // Set the counter to the max value. Note that the max number of blocks in a
+ // page may vary. To provide an easier way to tell if all the blocks are
+ // counted for different pages, set to the same max value to denote the
+ // all-counted status.
+ void setAsAllCounted(uptr Region, uptr I) const {
+ DCHECK_LE(get(Region, I), CounterMask);
+ const uptr Index = I >> PackingRatioLog;
+ const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog;
+ DCHECK_LT(BitOffset, SCUDO_WORDSIZE);
+ Buffer[Region * SizePerRegion + Index] |= CounterMask << BitOffset;
+ }
+ void setAsAllCountedRange(uptr Region, uptr From, uptr To) const {
+ DCHECK_LE(From, To);
+ const uptr Top = Min(To + 1, NumCounters);
+ for (uptr I = From; I < Top; I++)
+ setAsAllCounted(Region, I);
+ }
- static const uptr StaticBufferCount = 2048U;
+ bool updateAsAllCountedIf(uptr Region, uptr I, uptr MaxCount) {
+ const uptr Count = get(Region, I);
+ if (Count == CounterMask)
+ return true;
+ if (Count == MaxCount) {
+ setAsAllCounted(Region, I);
+ return true;
+ }
+ return false;
+ }
+ bool isAllCounted(uptr Region, uptr I) const {
+ return get(Region, I) == CounterMask;
+ }
+
+ uptr getBufferSize() const { return BufferSize; }
private:
- const uptr Regions;
- const uptr NumCounters;
+ uptr Regions;
+ uptr NumCounters;
uptr CounterSizeBitsLog;
uptr CounterMask;
uptr PackingRatioLog;
@@ -139,17 +323,20 @@ private:
uptr BufferSize;
uptr *Buffer;
- static HybridMutex Mutex;
- static uptr StaticBuffer[StaticBufferCount];
+ // We may consider making this configurable if there are cases which may
+ // benefit from this.
+ static const uptr StaticBufferCount = 2U;
+ static const uptr StaticBufferSize = 512U;
+ static BufferPool<StaticBufferCount, StaticBufferSize> Buffers;
};
template <class ReleaseRecorderT> class FreePagesRangeTracker {
public:
- explicit FreePagesRangeTracker(ReleaseRecorderT *Recorder)
+ explicit FreePagesRangeTracker(ReleaseRecorderT &Recorder)
: Recorder(Recorder), PageSizeLog(getLog2(getPageSizeCached())) {}
- void processNextPage(bool Freed) {
- if (Freed) {
+ void processNextPage(bool Released) {
+ if (Released) {
if (!InRange) {
CurrentRangeStatePage = CurrentPage;
InRange = true;
@@ -170,113 +357,270 @@ public:
private:
void closeOpenedRange() {
if (InRange) {
- Recorder->releasePageRangeToOS((CurrentRangeStatePage << PageSizeLog),
- (CurrentPage << PageSizeLog));
+ Recorder.releasePageRangeToOS((CurrentRangeStatePage << PageSizeLog),
+ (CurrentPage << PageSizeLog));
InRange = false;
}
}
- ReleaseRecorderT *const Recorder;
+ ReleaseRecorderT &Recorder;
const uptr PageSizeLog;
bool InRange = false;
uptr CurrentPage = 0;
uptr CurrentRangeStatePage = 0;
};
-template <class TransferBatchT, class ReleaseRecorderT, typename DecompactPtrT,
- typename SkipRegionT>
-NOINLINE void
-releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList,
- uptr RegionSize, uptr NumberOfRegions, uptr BlockSize,
- ReleaseRecorderT *Recorder, DecompactPtrT DecompactPtr,
- SkipRegionT SkipRegion) {
- const uptr PageSize = getPageSizeCached();
-
- // Figure out the number of chunks per page and whether we can take a fast
- // path (the number of chunks per page is the same for all pages).
- uptr FullPagesBlockCountMax;
- bool SameBlockCountPerPage;
- if (BlockSize <= PageSize) {
- if (PageSize % BlockSize == 0) {
- // Same number of chunks per page, no cross overs.
- FullPagesBlockCountMax = PageSize / BlockSize;
- SameBlockCountPerPage = true;
- } else if (BlockSize % (PageSize % BlockSize) == 0) {
- // Some chunks are crossing page boundaries, which means that the page
- // contains one or two partial chunks, but all pages contain the same
- // number of chunks.
- FullPagesBlockCountMax = PageSize / BlockSize + 1;
- SameBlockCountPerPage = true;
+struct PageReleaseContext {
+ PageReleaseContext(uptr BlockSize, uptr NumberOfRegions, uptr ReleaseSize,
+ uptr ReleaseOffset = 0)
+ : BlockSize(BlockSize), NumberOfRegions(NumberOfRegions) {
+ PageSize = getPageSizeCached();
+ if (BlockSize <= PageSize) {
+ if (PageSize % BlockSize == 0) {
+ // Same number of chunks per page, no cross overs.
+ FullPagesBlockCountMax = PageSize / BlockSize;
+ SameBlockCountPerPage = true;
+ } else if (BlockSize % (PageSize % BlockSize) == 0) {
+ // Some chunks are crossing page boundaries, which means that the page
+ // contains one or two partial chunks, but all pages contain the same
+ // number of chunks.
+ FullPagesBlockCountMax = PageSize / BlockSize + 1;
+ SameBlockCountPerPage = true;
+ } else {
+ // Some chunks are crossing page boundaries, which means that the page
+ // contains one or two partial chunks.
+ FullPagesBlockCountMax = PageSize / BlockSize + 2;
+ SameBlockCountPerPage = false;
+ }
} else {
- // Some chunks are crossing page boundaries, which means that the page
- // contains one or two partial chunks.
- FullPagesBlockCountMax = PageSize / BlockSize + 2;
- SameBlockCountPerPage = false;
+ if (BlockSize % PageSize == 0) {
+ // One chunk covers multiple pages, no cross overs.
+ FullPagesBlockCountMax = 1;
+ SameBlockCountPerPage = true;
+ } else {
+ // One chunk covers multiple pages, Some chunks are crossing page
+ // boundaries. Some pages contain one chunk, some contain two.
+ FullPagesBlockCountMax = 2;
+ SameBlockCountPerPage = false;
+ }
}
- } else {
- if (BlockSize % PageSize == 0) {
- // One chunk covers multiple pages, no cross overs.
- FullPagesBlockCountMax = 1;
- SameBlockCountPerPage = true;
+
+ // TODO: For multiple regions, it's more complicated to support partial
+ // region marking (which includes the complexity of how to handle the last
+ // block in a region). We may consider this after markFreeBlocks() accepts
+ // only free blocks from the same region.
+ if (NumberOfRegions != 1)
+ DCHECK_EQ(ReleaseOffset, 0U);
+
+ PagesCount = roundUp(ReleaseSize, PageSize) / PageSize;
+ PageSizeLog = getLog2(PageSize);
+ ReleasePageOffset = ReleaseOffset >> PageSizeLog;
+ }
+
+ // PageMap is lazily allocated when markFreeBlocks() is invoked.
+ bool hasBlockMarked() const {
+ return PageMap.isAllocated();
+ }
+
+ bool ensurePageMapAllocated() {
+ if (PageMap.isAllocated())
+ return true;
+ PageMap.reset(NumberOfRegions, PagesCount, FullPagesBlockCountMax);
+ // TODO: Log some message when we fail on PageMap allocation.
+ return PageMap.isAllocated();
+ }
+
+ // Mark all the blocks in the given range [From, to). Instead of visiting all
+ // the blocks, we will just mark the page as all counted. Note the `From` and
+ // `To` has to be page aligned but with one exception, if `To` is equal to the
+ // RegionSize, it's not necessary to be aligned with page size.
+ bool markRangeAsAllCounted(uptr From, uptr To, uptr Base,
+ const uptr RegionIndex, const uptr RegionSize) {
+ DCHECK_LT(From, To);
+ DCHECK_LE(To, Base + RegionSize);
+ DCHECK_EQ(From % PageSize, 0U);
+ DCHECK_LE(To - From, RegionSize);
+
+ if (!ensurePageMapAllocated())
+ return false;
+
+ uptr FromInRegion = From - Base;
+ uptr ToInRegion = To - Base;
+ uptr FirstBlockInRange = roundUpSlow(FromInRegion, BlockSize);
+
+ // The straddling block sits across entire range.
+ if (FirstBlockInRange >= ToInRegion)
+ return true;
+
+ // First block may not sit at the first pape in the range, move
+ // `FromInRegion` to the first block page.
+ FromInRegion = roundDown(FirstBlockInRange, PageSize);
+
+ // When The first block is not aligned to the range boundary, which means
+ // there is a block sitting acorss `From`, that looks like,
+ //
+ // From To
+ // V V
+ // +-----------------------------------------------+
+ // +-----+-----+-----+-----+
+ // | | | | | ...
+ // +-----+-----+-----+-----+
+ // |- first page -||- second page -||- ...
+ //
+ // Therefore, we can't just mark the first page as all counted. Instead, we
+ // increment the number of blocks in the first page in the page map and
+ // then round up the `From` to the next page.
+ if (FirstBlockInRange != FromInRegion) {
+ DCHECK_GT(FromInRegion + PageSize, FirstBlockInRange);
+ uptr NumBlocksInFirstPage =
+ (FromInRegion + PageSize - FirstBlockInRange + BlockSize - 1) /
+ BlockSize;
+ PageMap.incN(RegionIndex, getPageIndex(FromInRegion),
+ NumBlocksInFirstPage);
+ FromInRegion = roundUp(FromInRegion + 1, PageSize);
+ }
+
+ uptr LastBlockInRange = roundDownSlow(ToInRegion - 1, BlockSize);
+
+ // Note that LastBlockInRange may be smaller than `FromInRegion` at this
+ // point because it may contain only one block in the range.
+
+ // When the last block sits across `To`, we can't just mark the pages
+ // occupied by the last block as all counted. Instead, we increment the
+ // counters of those pages by 1. The exception is that if it's the last
+ // block in the region, it's fine to mark those pages as all counted.
+ if (LastBlockInRange + BlockSize != RegionSize) {
+ DCHECK_EQ(ToInRegion % PageSize, 0U);
+ // The case below is like,
+ //
+ // From To
+ // V V
+ // +----------------------------------------+
+ // +-----+-----+-----+-----+
+ // | | | | | ...
+ // +-----+-----+-----+-----+
+ // ... -||- last page -||- next page -|
+ //
+ // The last block is not aligned to `To`, we need to increment the
+ // counter of `next page` by 1.
+ if (LastBlockInRange + BlockSize != ToInRegion) {
+ PageMap.incRange(RegionIndex, getPageIndex(ToInRegion),
+ getPageIndex(LastBlockInRange + BlockSize - 1));
+ }
} else {
- // One chunk covers multiple pages, Some chunks are crossing page
- // boundaries. Some pages contain one chunk, some contain two.
- FullPagesBlockCountMax = 2;
- SameBlockCountPerPage = false;
+ ToInRegion = RegionSize;
+ }
+
+ // After handling the first page and the last block, it's safe to mark any
+ // page in between the range [From, To).
+ if (FromInRegion < ToInRegion) {
+ PageMap.setAsAllCountedRange(RegionIndex, getPageIndex(FromInRegion),
+ getPageIndex(ToInRegion - 1));
}
+
+ return true;
}
- const uptr PagesCount = roundUpTo(RegionSize, PageSize) / PageSize;
- PackedCounterArray Counters(NumberOfRegions, PagesCount,
- FullPagesBlockCountMax);
- if (!Counters.isAllocated())
- return;
-
- const uptr PageSizeLog = getLog2(PageSize);
- const uptr RoundedRegionSize = PagesCount << PageSizeLog;
- const uptr RoundedSize = NumberOfRegions * RoundedRegionSize;
-
- // Iterate over free chunks and count how many free chunks affect each
- // allocated page.
- if (BlockSize <= PageSize && PageSize % BlockSize == 0) {
- // Each chunk affects one page only.
- for (const auto &It : FreeList) {
- for (u32 I = 0; I < It.getCount(); I++) {
- const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase();
- if (P >= RoundedSize)
- continue;
- const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
- const uptr PInRegion = P - RegionIndex * RegionSize;
- Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
+ template <class TransferBatchT, typename DecompactPtrT>
+ bool markFreeBlocksInRegion(const IntrusiveList<TransferBatchT> &FreeList,
+ DecompactPtrT DecompactPtr, const uptr Base,
+ const uptr RegionIndex, const uptr RegionSize,
+ bool MayContainLastBlockInRegion) {
+ if (!ensurePageMapAllocated())
+ return false;
+
+ if (MayContainLastBlockInRegion) {
+ const uptr LastBlockInRegion =
+ ((RegionSize / BlockSize) - 1U) * BlockSize;
+ // The last block in a region may not use the entire page, we mark the
+ // following "pretend" memory block(s) as free in advance.
+ //
+ // Region Boundary
+ // v
+ // -----+-----------------------+
+ // | Last Page | <- Rounded Region Boundary
+ // -----+-----------------------+
+ // |-----||- trailing blocks -|
+ // ^
+ // last block
+ const uptr RoundedRegionSize = roundUp(RegionSize, PageSize);
+ const uptr TrailingBlockBase = LastBlockInRegion + BlockSize;
+ // If the difference between `RoundedRegionSize` and
+ // `TrailingBlockBase` is larger than a page, that implies the reported
+ // `RegionSize` may not be accurate.
+ DCHECK_LT(RoundedRegionSize - TrailingBlockBase, PageSize);
+
+ // Only the last page touched by the last block needs to mark the trailing
+ // blocks. Note that if the last "pretend" block straddles the boundary,
+ // we still have to count it in so that the logic of counting the number
+ // of blocks on a page is consistent.
+ uptr NumTrailingBlocks =
+ (roundUpSlow(RoundedRegionSize - TrailingBlockBase, BlockSize) +
+ BlockSize - 1) /
+ BlockSize;
+ if (NumTrailingBlocks > 0) {
+ PageMap.incN(RegionIndex, getPageIndex(TrailingBlockBase),
+ NumTrailingBlocks);
}
}
- } else {
- // In all other cases chunks might affect more than one page.
- DCHECK_GE(RegionSize, BlockSize);
- const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize;
- for (const auto &It : FreeList) {
- for (u32 I = 0; I < It.getCount(); I++) {
- const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase();
- if (P >= RoundedSize)
- continue;
- const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
- uptr PInRegion = P - RegionIndex * RegionSize;
- Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
- (PInRegion + BlockSize - 1) >> PageSizeLog);
- // The last block in a region might straddle a page, so if it's
- // free, we mark the following "pretend" memory block(s) as free.
- if (PInRegion == LastBlockInRegion) {
- PInRegion += BlockSize;
- while (PInRegion < RoundedRegionSize) {
- Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
- (PInRegion + BlockSize - 1) >> PageSizeLog);
- PInRegion += BlockSize;
- }
+
+ // Iterate over free chunks and count how many free chunks affect each
+ // allocated page.
+ if (BlockSize <= PageSize && PageSize % BlockSize == 0) {
+ // Each chunk affects one page only.
+ for (const auto &It : FreeList) {
+ for (u16 I = 0; I < It.getCount(); I++) {
+ const uptr PInRegion = DecompactPtr(It.get(I)) - Base;
+ DCHECK_LT(PInRegion, RegionSize);
+ PageMap.inc(RegionIndex, getPageIndex(PInRegion));
+ }
+ }
+ } else {
+ // In all other cases chunks might affect more than one page.
+ DCHECK_GE(RegionSize, BlockSize);
+ for (const auto &It : FreeList) {
+ for (u16 I = 0; I < It.getCount(); I++) {
+ const uptr PInRegion = DecompactPtr(It.get(I)) - Base;
+ PageMap.incRange(RegionIndex, getPageIndex(PInRegion),
+ getPageIndex(PInRegion + BlockSize - 1));
}
}
}
+
+ return true;
}
+ uptr getPageIndex(uptr P) { return (P >> PageSizeLog) - ReleasePageOffset; }
+
+ uptr BlockSize;
+ uptr NumberOfRegions;
+ // For partial region marking, some pages in front are not needed to be
+ // counted.
+ uptr ReleasePageOffset;
+ uptr PageSize;
+ uptr PagesCount;
+ uptr PageSizeLog;
+ uptr FullPagesBlockCountMax;
+ bool SameBlockCountPerPage;
+ RegionPageMap PageMap;
+};
+
+// Try to release the page which doesn't have any in-used block, i.e., they are
+// all free blocks. The `PageMap` will record the number of free blocks in each
+// page.
+template <class ReleaseRecorderT, typename SkipRegionT>
+NOINLINE void
+releaseFreeMemoryToOS(PageReleaseContext &Context,
+ ReleaseRecorderT &Recorder, SkipRegionT SkipRegion) {
+ const uptr PageSize = Context.PageSize;
+ const uptr BlockSize = Context.BlockSize;
+ const uptr PagesCount = Context.PagesCount;
+ const uptr NumberOfRegions = Context.NumberOfRegions;
+ const uptr ReleasePageOffset = Context.ReleasePageOffset;
+ const uptr FullPagesBlockCountMax = Context.FullPagesBlockCountMax;
+ const bool SameBlockCountPerPage = Context.SameBlockCountPerPage;
+ RegionPageMap &PageMap = Context.PageMap;
+
// Iterate over pages detecting ranges of pages with chunk Counters equal
// to the expected number of chunks for the particular page.
FreePagesRangeTracker<ReleaseRecorderT> RangeTracker(Recorder);
@@ -287,9 +631,11 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList,
RangeTracker.skipPages(PagesCount);
continue;
}
- for (uptr J = 0; J < PagesCount; J++)
- RangeTracker.processNextPage(Counters.get(I, J) ==
- FullPagesBlockCountMax);
+ for (uptr J = 0; J < PagesCount; J++) {
+ const bool CanRelease =
+ PageMap.updateAsAllCountedIf(I, J, FullPagesBlockCountMax);
+ RangeTracker.processNextPage(CanRelease);
+ }
}
} else {
// Slow path, go through the pages keeping count how many chunks affect
@@ -308,6 +654,10 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList,
}
uptr PrevPageBoundary = 0;
uptr CurrentBoundary = 0;
+ if (ReleasePageOffset > 0) {
+ PrevPageBoundary = ReleasePageOffset * PageSize;
+ CurrentBoundary = roundUpSlow(PrevPageBoundary, BlockSize);
+ }
for (uptr J = 0; J < PagesCount; J++) {
const uptr PageBoundary = PrevPageBoundary + PageSize;
uptr BlocksPerPage = Pn;
@@ -321,7 +671,9 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList,
}
}
PrevPageBoundary = PageBoundary;
- RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage);
+ const bool CanRelease =
+ PageMap.updateAsAllCountedIf(I, J, BlocksPerPage);
+ RangeTracker.processNextPage(CanRelease);
}
}
}