diff options
-rw-r--r-- | standalone/primary32.h | 65 | ||||
-rw-r--r-- | standalone/primary64.h | 14 | ||||
-rw-r--r-- | standalone/release.cpp | 2 | ||||
-rw-r--r-- | standalone/release.h | 113 | ||||
-rw-r--r-- | standalone/tests/release_test.cpp | 30 |
5 files changed, 131 insertions, 93 deletions
diff --git a/standalone/primary32.h b/standalone/primary32.h index 262793158b0..7d061e2cbcc 100644 --- a/standalone/primary32.h +++ b/standalone/primary32.h @@ -435,6 +435,18 @@ private: if (BytesPushed < PageSize) return 0; // Nothing new to release. + // Releasing smaller blocks is expensive, so we want to make sure that a + // significant amount of bytes are free, and that there has been a good + // amount of batches pushed to the freelist before attempting to release. + if (BlockSize < PageSize / 16U) { + if (!Force && BytesPushed < Sci->AllocatedUser / 16U) + return 0; + // We want 8x% to 9x% free bytes (the larger the bock, the lower the %). + if ((BytesInFreeList * 100U) / Sci->AllocatedUser < + (100U - 1U - BlockSize / 16U)) + return 0; + } + if (!Force) { const s32 IntervalMs = getReleaseToOsIntervalMs(); if (IntervalMs < 0) @@ -446,36 +458,33 @@ private: } } - if (BlockSize < PageSize / 16) { - if (BytesPushed < (Sci->AllocatedUser / 16U)) - return 0; - if (BytesInFreeList / (Sci->AllocatedUser / 100U) < - (100U - getMostSignificantSetBitIndex(BlockSize) * 2)) - return 0; - } - - // TODO(kostyak): currently not ideal as we loop over all regions and - // iterate multiple times over the same freelist if a ClassId spans multiple - // regions. But it will have to do for now. - uptr TotalReleasedBytes = 0; - const uptr MaxSize = (RegionSize / BlockSize) * BlockSize; + DCHECK_GT(MinRegionIndex, 0U); + uptr First = 0; for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { if (PossibleRegions[I] - 1U == ClassId) { - const uptr Region = I * RegionSize; - // If the region is the one currently associated to the size-class, we - // only need to release up to CurrentRegionAllocated, MaxSize otherwise. - const uptr Size = (Region == Sci->CurrentRegion) - ? Sci->CurrentRegionAllocated - : MaxSize; - ReleaseRecorder Recorder(Region); - releaseFreeMemoryToOS(Sci->FreeList, Region, Size, BlockSize, - &Recorder); - if (Recorder.getReleasedRangesCount() > 0) { - Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; - Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); - Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); - TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; - } + First = I; + break; + } + } + uptr Last = 0; + for (uptr I = MaxRegionIndex; I >= MinRegionIndex; I--) { + if (PossibleRegions[I] - 1U == ClassId) { + Last = I; + break; + } + } + uptr TotalReleasedBytes = 0; + if (First != 0U && Last != 0U) { + const uptr Base = First * RegionSize; + const uptr NumberOfRegions = Last - First + 1U; + ReleaseRecorder Recorder(Base); + releaseFreeMemoryToOS(Sci->FreeList, Base, RegionSize, NumberOfRegions, + BlockSize, &Recorder); + if (Recorder.getReleasedRangesCount() > 0) { + Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; + Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); + Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; } } Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); diff --git a/standalone/primary64.h b/standalone/primary64.h index 8e0224e0450..7bdb7ae6e49 100644 --- a/standalone/primary64.h +++ b/standalone/primary64.h @@ -402,6 +402,18 @@ private: if (BytesPushed < PageSize) return 0; // Nothing new to release. + // Releasing smaller blocks is expensive, so we want to make sure that a + // significant amount of bytes are free, and that there has been a good + // amount of batches pushed to the freelist before attempting to release. + if (BlockSize < PageSize / 16U) { + if (!Force && BytesPushed < Region->AllocatedUser / 16U) + return 0; + // We want 8x% to 9x% free bytes (the larger the bock, the lower the %). + if ((BytesInFreeList * 100U) / Region->AllocatedUser < + (100U - 1U - BlockSize / 16U)) + return 0; + } + if (!Force) { const s32 IntervalMs = getReleaseToOsIntervalMs(); if (IntervalMs < 0) @@ -415,7 +427,7 @@ private: ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg, - Region->AllocatedUser, BlockSize, &Recorder); + Region->AllocatedUser, 1U, BlockSize, &Recorder); if (Recorder.getReleasedRangesCount() > 0) { Region->ReleaseInfo.PushedBlocksAtLastRelease = diff --git a/standalone/release.cpp b/standalone/release.cpp index e144b354b25..5d7c6c5fc11 100644 --- a/standalone/release.cpp +++ b/standalone/release.cpp @@ -11,6 +11,6 @@ namespace scudo { HybridMutex PackedCounterArray::Mutex = {}; -uptr PackedCounterArray::StaticBuffer[1024]; +uptr PackedCounterArray::StaticBuffer[PackedCounterArray::StaticBufferCount]; } // namespace scudo diff --git a/standalone/release.h b/standalone/release.h index 323bf9db6dc..b50f36fa0c0 100644 --- a/standalone/release.h +++ b/standalone/release.h @@ -49,7 +49,10 @@ private: // incremented past MaxValue. class PackedCounterArray { public: - PackedCounterArray(uptr NumCounters, uptr MaxValue) : N(NumCounters) { + PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion, + uptr MaxValue) + : Regions(NumberOfRegions), NumCounters(CountersPerRegion) { + CHECK_GT(Regions, 0); CHECK_GT(NumCounters, 0); CHECK_GT(MaxValue, 0); constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL; @@ -66,10 +69,12 @@ public: PackingRatioLog = getLog2(PackingRatio); BitOffsetMask = PackingRatio - 1; - BufferSize = (roundUpTo(N, static_cast<uptr>(1U) << PackingRatioLog) >> - PackingRatioLog) * - sizeof(*Buffer); - if (BufferSize <= StaticBufferSize && Mutex.tryLock()) { + SizePerRegion = + roundUpTo(NumCounters, static_cast<uptr>(1U) << PackingRatioLog) >> + PackingRatioLog; + BufferSize = SizePerRegion * sizeof(*Buffer) * Regions; + if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) && + Mutex.tryLock()) { Buffer = &StaticBuffer[0]; memset(Buffer, 0, BufferSize); } else { @@ -88,45 +93,51 @@ public: bool isAllocated() const { return !!Buffer; } - uptr getCount() const { return N; } - uptr get(uptr I) const { - DCHECK_LT(I, N); + uptr getCount() const { return NumCounters; } + + uptr get(uptr Region, uptr I) const { + DCHECK_LT(Region, Regions); + DCHECK_LT(I, NumCounters); const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; - return (Buffer[Index] >> BitOffset) & CounterMask; + return (Buffer[Region * SizePerRegion + Index] >> BitOffset) & CounterMask; } - void inc(uptr I) const { - DCHECK_LT(get(I), CounterMask); + void inc(uptr Region, uptr I) const { + DCHECK_LT(get(Region, I), CounterMask); const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; DCHECK_LT(BitOffset, SCUDO_WORDSIZE); - Buffer[Index] += static_cast<uptr>(1U) << BitOffset; + Buffer[Region * SizePerRegion + Index] += static_cast<uptr>(1U) + << BitOffset; } - void incRange(uptr From, uptr To) const { + void incRange(uptr Region, uptr From, uptr To) const { DCHECK_LE(From, To); - const uptr Top = Min(To + 1, N); + const uptr Top = Min(To + 1, NumCounters); for (uptr I = From; I < Top; I++) - inc(I); + inc(Region, I); } uptr getBufferSize() const { return BufferSize; } + static const uptr StaticBufferCount = 2048U; + private: - const uptr N; + const uptr Regions; + const uptr NumCounters; uptr CounterSizeBitsLog; uptr CounterMask; uptr PackingRatioLog; uptr BitOffsetMask; + uptr SizePerRegion; uptr BufferSize; uptr *Buffer; static HybridMutex Mutex; - static const uptr StaticBufferSize = 1024U; - static uptr StaticBuffer[StaticBufferSize]; + static uptr StaticBuffer[StaticBufferCount]; }; template <class ReleaseRecorderT> class FreePagesRangeTracker { @@ -167,7 +178,8 @@ private: template <class TransferBatchT, class ReleaseRecorderT> NOINLINE void releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, - uptr Size, uptr BlockSize, ReleaseRecorderT *Recorder) { + uptr RegionSize, uptr NumberOfRegions, uptr BlockSize, + ReleaseRecorderT *Recorder) { const uptr PageSize = getPageSizeCached(); // Figure out the number of chunks per page and whether we can take a fast @@ -204,13 +216,14 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, } } - const uptr PagesCount = roundUpTo(Size, PageSize) / PageSize; - PackedCounterArray Counters(PagesCount, FullPagesBlockCountMax); + const uptr PagesCount = roundUpTo(RegionSize, PageSize) / PageSize; + PackedCounterArray Counters(NumberOfRegions, PagesCount, + FullPagesBlockCountMax); if (!Counters.isAllocated()) return; const uptr PageSizeLog = getLog2(PageSize); - const uptr RoundedSize = PagesCount << PageSizeLog; + const uptr RoundedSize = NumberOfRegions * (PagesCount << PageSizeLog); // Iterate over free chunks and count how many free chunks affect each // allocated page. @@ -226,12 +239,13 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) - Counters.inc(P >> PageSizeLog); + if (P < RoundedSize) { + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + const uptr PInRegion = P - RegionIndex * RegionSize; + Counters.inc(RegionIndex, PInRegion >> PageSizeLog); + } } } - for (uptr P = Size; P < RoundedSize; P += BlockSize) - Counters.inc(P >> PageSizeLog); } else { // In all other cases chunks might affect more than one page. for (const auto &It : FreeList) { @@ -242,13 +256,14 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) - Counters.incRange(P >> PageSizeLog, - (P + BlockSize - 1) >> PageSizeLog); + if (P < RoundedSize) { + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + const uptr PInRegion = P - RegionIndex * RegionSize; + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); + } } } - for (uptr P = Size; P < RoundedSize; P += BlockSize) - Counters.incRange(P >> PageSizeLog, (P + BlockSize - 1) >> PageSizeLog); } // Iterate over pages detecting ranges of pages with chunk Counters equal @@ -256,8 +271,10 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, FreePagesRangeTracker<ReleaseRecorderT> RangeTracker(Recorder); if (SameBlockCountPerPage) { // Fast path, every page has the same number of chunks affecting it. - for (uptr I = 0; I < Counters.getCount(); I++) - RangeTracker.processNextPage(Counters.get(I) == FullPagesBlockCountMax); + for (uptr I = 0; I < NumberOfRegions; I++) + for (uptr J = 0; J < PagesCount; J++) + RangeTracker.processNextPage(Counters.get(I, J) == + FullPagesBlockCountMax); } else { // Slow path, go through the pages keeping count how many chunks affect // each page. @@ -268,23 +285,25 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, // except the first and the last one) and then the last chunk size, adding // up the number of chunks on the current page and checking on every step // whether the page boundary was crossed. - uptr PrevPageBoundary = 0; - uptr CurrentBoundary = 0; - for (uptr I = 0; I < Counters.getCount(); I++) { - const uptr PageBoundary = PrevPageBoundary + PageSize; - uptr BlocksPerPage = Pn; - if (CurrentBoundary < PageBoundary) { - if (CurrentBoundary > PrevPageBoundary) - BlocksPerPage++; - CurrentBoundary += Pnc; + for (uptr I = 0; I < NumberOfRegions; I++) { + uptr PrevPageBoundary = 0; + uptr CurrentBoundary = 0; + for (uptr J = 0; J < PagesCount; J++) { + const uptr PageBoundary = PrevPageBoundary + PageSize; + uptr BlocksPerPage = Pn; if (CurrentBoundary < PageBoundary) { - BlocksPerPage++; - CurrentBoundary += BlockSize; + if (CurrentBoundary > PrevPageBoundary) + BlocksPerPage++; + CurrentBoundary += Pnc; + if (CurrentBoundary < PageBoundary) { + BlocksPerPage++; + CurrentBoundary += BlockSize; + } } - } - PrevPageBoundary = PageBoundary; + PrevPageBoundary = PageBoundary; - RangeTracker.processNextPage(Counters.get(I) == BlocksPerPage); + RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage); + } } } RangeTracker.finish(); diff --git a/standalone/tests/release_test.cpp b/standalone/tests/release_test.cpp index a7478f47479..8907520d30c 100644 --- a/standalone/tests/release_test.cpp +++ b/standalone/tests/release_test.cpp @@ -21,14 +21,14 @@ TEST(ScudoReleaseTest, PackedCounterArray) { for (scudo::uptr I = 0; I < SCUDO_WORDSIZE; I++) { // Various valid counter's max values packed into one word. - scudo::PackedCounterArray Counters2N(1, 1UL << I); + scudo::PackedCounterArray Counters2N(1U, 1U, 1UL << I); EXPECT_EQ(sizeof(scudo::uptr), Counters2N.getBufferSize()); // Check the "all bit set" values too. - scudo::PackedCounterArray Counters2N1_1(1, ~0UL >> I); + scudo::PackedCounterArray Counters2N1_1(1U, 1U, ~0UL >> I); EXPECT_EQ(sizeof(scudo::uptr), Counters2N1_1.getBufferSize()); // Verify the packing ratio, the counter is Expected to be packed into the // closest power of 2 bits. - scudo::PackedCounterArray Counters(SCUDO_WORDSIZE, 1UL << I); + scudo::PackedCounterArray Counters(1U, SCUDO_WORDSIZE, 1UL << I); EXPECT_EQ(sizeof(scudo::uptr) * scudo::roundUpToPowerOfTwo(I + 1), Counters.getBufferSize()); } @@ -38,19 +38,19 @@ TEST(ScudoReleaseTest, PackedCounterArray) { // Make sure counters request one memory page for the buffer. const scudo::uptr NumCounters = (scudo::getPageSizeCached() / 8) * (SCUDO_WORDSIZE >> I); - scudo::PackedCounterArray Counters(NumCounters, 1UL << ((1UL << I) - 1)); - Counters.inc(0); + scudo::PackedCounterArray Counters(1U, NumCounters, 1UL << ((1UL << I) - 1)); + Counters.inc(0U, 0U); for (scudo::uptr C = 1; C < NumCounters - 1; C++) { - EXPECT_EQ(0UL, Counters.get(C)); - Counters.inc(C); - EXPECT_EQ(1UL, Counters.get(C - 1)); + EXPECT_EQ(0UL, Counters.get(0U, C)); + Counters.inc(0U, C); + EXPECT_EQ(1UL, Counters.get(0U, C - 1)); } - EXPECT_EQ(0UL, Counters.get(NumCounters - 1)); - Counters.inc(NumCounters - 1); + EXPECT_EQ(0UL, Counters.get(0U, NumCounters - 1)); + Counters.inc(0U, NumCounters - 1); if (I > 0) { - Counters.incRange(0, NumCounters - 1); + Counters.incRange(0U, 0U, NumCounters - 1); for (scudo::uptr C = 0; C < NumCounters; C++) - EXPECT_EQ(2UL, Counters.get(C)); + EXPECT_EQ(2UL, Counters.get(0U, C)); } } } @@ -190,7 +190,7 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { // Release the memory. ReleasedPagesRecorder Recorder; - releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, BlockSize, + releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, 1U, BlockSize, &Recorder); // Verify that there are no released pages touched by used chunks and all @@ -240,9 +240,7 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() { if (InFreeRange) { scudo::uptr P = scudo::roundUpTo(CurrentFreeRangeStart, PageSize); - const scudo::uptr EndPage = - scudo::roundUpTo(MaxBlocks * BlockSize, PageSize); - while (P + PageSize <= EndPage) { + while (P + PageSize <= MaxBlocks * BlockSize) { const bool PageReleased = Recorder.ReportedPages.find(P) != Recorder.ReportedPages.end(); EXPECT_EQ(true, PageReleased); |